#####################################################################
#####         Replicate main data analyses in paper             #####
#####       "The Aggregate Costs of Political Connections"      #####
#####     Public Use Version using de-identified Micro data     #####
#####           Date created: 22.09.2025, Jonas Gathen          #####
#####################################################################

#################################
###### 1. Data preparation ###### 
#################################

## To run this script, make sure that all packages are installed. 

##### Libraries #####

# markdown & base packages
library(bookdown)
library(tidyverse)
options(dplyr.summarise.inform = FALSE) # to hide summarise info
library(knitr)
library(haven)

# plotting packages
library(ggridges)
library(gridExtra)
library(ggnewscale)
library(ggpubr)
library(extrafont) 
loadfonts()

# table package
library(kableExtra)
library(gtools)

# extra matrix operations
library(matrixStats)

# nonlinear roots
library(nleqslv)
library(lbfgsb3c)
library(optimParallel)

# paralellization 
library(foreach)
library(doParallel)

# for unobserved heterogeneity
library(mvtnorm)

# for generalizes simulated annealing solver
library(GenSA)

##### Main parameters #####

# Stationary case: interest rate fixed by HH preferences
beta <- 0.95
r <- 1/beta - 1
delta <- 0.1
R <- r + delta
wage <- 1
P <- 1.0 

# tax system 
vat_tax <- 0.1 
profit_tax <- 0.2 

##### Load data #####

# load main data 
firms_1997 <- read_csv(file = "R/data/firm_replication_data.csv")

# load other data needed 
log_z_star_NC_survivors <- read_csv(file = "R/data/log_z_star_NC_survivors.csv")$log_z_star_NC_survivors
log_z_star_next_NC_survivors <- read_csv(file = "R/data/log_z_star_next_NC_survivors.csv")$log_z_star_next_NC_survivors

# load gross elasticities 
gross_elasticities <- read_csv(file = "R/data/gross_elasticities.csv")

###########################################################
###### Section 2: Political Connections in Indonesia ###### 
###########################################################

########### Differences between connected and non-connected firms ###########

####### Figure 1 (gross output) #######

# Get number of observations
nrow_non_connected <- nrow(firms_1997[!is.na(firms_1997$routs) & firms_1997$connected == "non-connected",])
nrow_connected <- nrow(firms_1997[!is.na(firms_1997$routs) & firms_1997$connected == "connected",])

# Dispersion
sd(firms_1997$routs[firms_1997$connected == "connected"])/mean(firms_1997$routs[firms_1997$connected == "connected"])
sd(firms_1997$routs[firms_1997$connected == "non-connected"])/mean(firms_1997$routs[firms_1997$connected == "non-connected"])

# Real gross output
firms_1997 %>% 
  mutate(connections = fct_rev(as.factor(connected)), 
         connected = case_when(
           connected == "connected" ~ "C", 
           connected == "non-connected" ~ "NC")) %>% 
  ggplot(aes(y = connected, group = connected, fill = connected)) + 
  geom_density_ridges(
    aes(x = routs/9040, # exchange rate with USD in 2010 (in 000's)
        fill = connections),
    alpha = 0.8, color = "black", quantile_lines = TRUE, quantiles = 2, show.legend = TRUE) + 
  scale_x_continuous(name = "Sales (in 000' USD)", trans = "log2",
                     breaks = c(1,10,100,1000,10000,100000,1000000),
                     labels = c("1","10","100","1k","10k","100k","1mio"),
                     limits = c(2.5,2500000)) + ylab("") + 
  theme_classic(base_size = 14, base_family = "LM Roman 10") + 
  scale_fill_discrete(name = "Type:") + 
  theme(legend.position = "bottom")

# PDF
ggsave("R/output/Figure1.pdf", width = 6, height = 4, units = "in")


####### Table 1: Requires info on industries, not provided here! #######

####### Figure A.1 (value-added in Appendix) #######

# Dispersion in value added: non-connected are 4x more dispersed 
sd(firms_1997$rvads[firms_1997$connected == "connected"])/mean(firms_1997$rvads[firms_1997$connected == "connected"])
sd(firms_1997$rvads[firms_1997$connected == "non-connected"])/mean(firms_1997$rvads[firms_1997$connected == "non-connected"])

# Real value-added output
firms_1997 %>% 
  mutate(connections = fct_rev(as.factor(connected)), 
         connected = case_when(
           connected == "connected" ~ "C", 
           connected == "non-connected" ~ "NC")) %>%
  ggplot(aes(y = connected, group = connected, fill = connected)) + 
  geom_density_ridges(
    aes(x = rvads/9040, # exchange rate with USD in 2010 (in 000's)
        fill = connections),
    alpha = 0.8, color = "black", quantile_lines = TRUE, quantiles = 2, show.legend = TRUE) + 
  scale_x_continuous(name = "(real) value-added production output (in '000 USD)", trans = "log2",
                     breaks = c(1,10,100,1000,10000,100000,1000000),
                     labels = c("1","10","100","1k","10k","100k","1mio"),
                     limits = c(0.25,2500000)) + ylab("") + 
  theme_classic(base_size = 14, base_family = "LM Roman 10") + 
  scale_fill_discrete(name = "Type:") + 
  theme(legend.position = "bottom")

############################################################
###### Section 3: Quantifying the role of connections ###### 
############################################################

####### TFPQ distribution #######

#### Preparation: get gross output elasticities & TFPQ measure ####

# save elasticities 
alpha_gross <- gross_elasticities$med_cap_share_1digit_gross/(1-vat_tax)
beta_gross <- gross_elasticities$med_lab_share_1digit_gross/(1-vat_tax)
gamma_gross <- gross_elasticities$med_mat_share_1digit_gross

eta_tilde_baseline <- alpha_gross + beta_gross + gamma_gross

### Compute TFPQ 

# compute TFPQ using gross output first 
firms_1997 <- firms_1997 %>% 
  mutate(
    log_TFPQ_gross = log(routs) - alpha_gross*log(alpha_gross*(1-vat_tax)*routs/R) - beta_gross*log(beta_gross*(1-vat_tax)*routs) - gamma_gross*log(gamma_gross*routs), 
    TFPQ = exp(log_TFPQ_gross))

# get residualized TFPQ measure 
firms_1997$TFPQ_resid <- exp(firms_1997$log_TFPQ_resid)

#### Figure 2 (left panel): residualized TFPQ distributions (gross output) #######

# TFPQ distribution 
plot_TFPQ_distrib <- firms_1997 %>% 
  mutate(
    connections = fct_rev(as.factor(connected)), 
    connected = case_when(
      connected == "connected" ~ "C", 
      connected == "non-connected" ~ "NC")) %>% 
  ggplot(aes(y = connected, group = connected, fill = connected)) + 
  geom_density_ridges(
    aes(x = log_TFPQ_resid, # divide by 9040 to show in 2010 USD
        fill = connections),
    alpha = 0.8, color = "black", quantile_lines = TRUE, quantiles = 2, 
    show.legend = TRUE, bandwidth = 0.075) + 
  labs(
    x = "log TFPQ",
    y = ""
  ) + 
  theme_classic(base_size = 16, base_family = "LM Roman 10") + 
  scale_fill_discrete(name = "Type:") + 
  theme(legend.position = "bottom")

#### Figure 2 (right panel): TFPQ_QR distribution implied by resid TFPQ ####

# Get baseline TFPQ_QR function
compute_baseline_TFPQ_QR <- function(data_connect,data_nonconnect, cutoff){
  
  # Arrange from low to high
  data_connect <- data_connect %>%
    arrange(TFPQ_resid)
  
  # get rows of connected firms 
  n_connect <- nrow(data_connect)
  
  ## First get restricted productivity distribution
  
  ### Need to have specified cutoff! ### 
  
  # Now restrict normally 
  restricted_productivity_distribution <- data_nonconnect %>% 
    # only look at TFPQ
    select(c(TFPQ_resid)) %>% 
    # order by TFPQ
    arrange(TFPQ_resid) %>% 
    # drop cutoff*% of observations
    dplyr::slice(round(cutoff*n()):n())
  
  restricted_productivity_distribution <- restricted_productivity_distribution$TFPQ_resid
  
  # Now get quantiles (weighted by rho if supplied)
  z_connected <- unname(quantile(
    x = restricted_productivity_distribution,
    probs = c(1:n_connect)/(n_connect+1),
  ))
  TFPQ_QR_connected <- data_connect$TFPQ_resid/z_connected - 1 
  
  # return results
  return(list(productivity = z_connected, TFPQ_QR = TFPQ_QR_connected))
}

# Get bootstrapping TFPQ_QR function 
compute_bs_baseline_TFPQ_QR <- function(data_connect,data_nonconnect, cutoff){
  
  # create productivity variable
  data_connect$productivity <- NA
  
  # Arrange from high to low
  data_connect <- data_connect %>%
    arrange(desc(TFPQ_resid))
  
  ## First get restricted productivity distribution
  
  # Now restrict normally 
  restricted_productivity_distribution <- data_nonconnect %>% 
    # only look at TFPR
    select(c(TFPQ_resid)) %>% 
    # order by TFPR
    arrange(TFPQ_resid) %>% 
    # drop cutoff*% of observations
    dplyr::slice(round(cutoff*n()):n())
  
  ## Now, draw with replacement from restricted productivity distribution
  
  n_draws <- nrow(data_connect)
  
  # set seeds
  set.seed(123456)
  n_bootstraps = 10000
  bootstrap_seeds = rnorm(n = n_bootstraps, mean = 1000, sd = 100)
  
  # Get sampling function 
  take_sample <- Vectorize(function(data,seed,n_draws){
    set.seed(seed)
    return(sort(sample(x = data, size = n_draws, replace = TRUE), decreasing = T))
  }, vectorize.args = c("seed"))
  
  # bootstrap samples
  bs_productivities <- take_sample(data = restricted_productivity_distribution$TFPQ_resid, 
                                   seed = bootstrap_seeds, n_draws = n_draws)
  
  bs_TFPQ_QR <- data_connect$TFPQ_resid/bs_productivities - 1
  
  # return results
  return(bs_TFPQ_QR)
}

# baseline TFPQ_QR (without fixed cost)
baseline_TFPQ_QR_noF <- compute_baseline_TFPQ_QR(
  data_connect = firms_1997[firms_1997$connected == "connected",], 
  data_nonconnect = firms_1997[firms_1997$connected == "non-connected",], 
  cutoff = 0.0)

baseline_TFPQ_QR_noF_bs <- compute_bs_baseline_TFPQ_QR(
  data_connect = firms_1997[firms_1997$connected == "connected",], 
  data_nonconnect = firms_1997[firms_1997$connected == "non-connected",], 
  cutoff = 0.0)

# baseline TFPQ_QR (max fixed cost)
max_cutoff <- (ecdf(firms_1997$TFPQ_resid[firms_1997$connected == "non-connected"]))(
  min(firms_1997$TFPQ_resid[firms_1997$connected == "connected"])
)

baseline_TFPQ_QR_maxF <- compute_baseline_TFPQ_QR(
  data_connect = firms_1997[firms_1997$connected == "connected",], 
  data_nonconnect = firms_1997[firms_1997$connected == "non-connected",], 
  cutoff = max_cutoff)

baseline_TFPQ_QR_maxF_bs <- compute_bs_baseline_TFPQ_QR(
  data_connect = firms_1997[firms_1997$connected == "connected",], 
  data_nonconnect = firms_1997[firms_1997$connected == "non-connected",], 
  cutoff = max_cutoff)

## Summarize bootstrapped confidence bands  
CI_baseline_TFPQ_QR_bs_upper <- tibble(
  productivity_order = c(241:1),
  `2.5%` = t(apply(baseline_TFPQ_QR_noF_bs, 1, quantile, probs = c(0.05, 0.95),  na.rm = TRUE))[,1],
  `97.5%` = t(apply(baseline_TFPQ_QR_noF_bs, 1, quantile, probs = c(0.05, 0.95),  na.rm = TRUE))[,2]
) %>% pivot_longer(cols = c(`2.5%`,`97.5%`), names_to = "Percentiles", values_to = "TFPQ_QR")

CI_baseline_TFPQ_QR_bs_lower <- tibble(
  productivity_order = c(241:1),
  `2.5%` = t(apply(baseline_TFPQ_QR_maxF_bs, 1, quantile, probs = c(0.05, 0.95),  na.rm = TRUE))[,1],
  `97.5%` = t(apply(baseline_TFPQ_QR_maxF_bs, 1, quantile, probs = c(0.05, 0.95),  na.rm = TRUE))[,2]
) %>% pivot_longer(cols = c(`2.5%`,`97.5%`), names_to = "Percentiles", values_to = "TFPQ_QR")


# Plot estimated TFPQ_QR schedule  
plot_baseline_TFPQ_QR_bound <- ggplot() + 
  geom_line(data = CI_baseline_TFPQ_QR_bs_upper, aes(x = (productivity_order/241)*100, y = TFPQ_QR, group = Percentiles), 
            alpha = 0.5, linetype = "dashed") + 
  geom_line(data = CI_baseline_TFPQ_QR_bs_lower, aes(x = (productivity_order/241)*100, y = TFPQ_QR, group = Percentiles), 
            alpha = 0.5, linetype = "dashed") + 
  geom_line(aes(x = (c(1:241)/241)*100, y = baseline_TFPQ_QR_noF$TFPQ_QR, color = "Upper"), linewidth = 1) + 
  geom_line(aes(x = (c(1:241)/241)*100, y = baseline_TFPQ_QR_maxF$TFPQ_QR, color = "Lower"), linewidth = 1) + 
  geom_hline(yintercept = 0, linetype = "longdash") + 
  ylab(label = "TFPQ Quantile Ratio") + 
  xlab(label = "TFPQ Percentile") + 
  scale_color_discrete(name = "Bound:") + 
  theme_classic(base_size = 16, base_family = "LM Roman 10") + 
  theme(legend.position = "bottom") + 
  ylim(c(0,0.65))

#### Figure 2 joint plot #### 

ggarrange(
  plot_TFPQ_distrib + labs(caption="(A) TFPQ distributions C vs NC") + theme(plot.caption = element_text(hjust=0.5, size=rel(1.2))), 
  plot_baseline_TFPQ_QR_bound + labs(caption="(B) TFPQ quantile ratio (with bounds)") + theme(plot.caption = element_text(hjust=0.5, size=rel(1.2))),
  ncol = 2, nrow = 1)  
ggsave(width = 12, height = 5, dpi = 200,
       "R/output/Figure2.pdf") 

#################################
###### Estimation of model ######
#################################

################ Baseline results Connections Technology ################

#### Prepare objects & set grid length ####

# x_star (needed to move between z_star to z_tilde)
x_star_baseline <- ( (((1-vat_tax)*alpha_gross/R)^alpha_gross)*(((1-vat_tax)*beta_gross/wage)^beta_gross)*((gamma_gross/P)^gamma_gross) )

# create z_tilde_function
z_tilde_baseline <- function(z_star){
  return( (z_star*x_star_baseline)^(1/(1-eta_tilde_baseline)) )
}

# specify grid length for dense and non-dense
grid_length <- 100
grid_dense_length <- 1000

# targeted moment 
empirical_QR_baseline <- baseline_TFPQ_QR_noF$TFPQ_QR

# Extract z_star_NC
z_star_NC_baseline <- firms_1997$TFPQ_resid[firms_1997$connected == "non-connected"]

# get density distribution over z_star_NC
z_star_marginal_distrib_baseline <- approxfun(density(log(z_star_NC_baseline)))(log(z_star_NC_baseline))

# get quantiles of non-connected 
quantiles_non_connected_TFPQ_data_baseline <- unname(quantile(
  x = firms_1997$TFPQ_resid[firms_1997$connected == "non-connected"],
  probs = c(1:nrow_connected)/(nrow_connected+1),
))  

# specify grid here!! Want to range over z_i for non-connected firms (don't need very many points!) 
z_star_grid_baseline <- seq(from = unname(quantile(z_star_NC_baseline,probs = 0.0)), 
                            to = unname(quantile(z_star_NC_baseline,probs = 1)), 
                            length.out = grid_length)

z_star_grid_dense_baseline <- seq(from = unname(quantile(z_star_NC_baseline,probs = 0.0)), 
                                  to = unname(quantile(z_star_NC_baseline,probs = 1)), 
                                  length.out = grid_dense_length)

z_tilde_grid_baseline <- z_tilde_baseline(z_star = z_star_grid_baseline)

z_tilde_grid_dense_baseline <- z_tilde_baseline(z_star = z_star_grid_dense_baseline)

# get density distribution over z_star grids 
z_star_grid_marginal_distrib_baseline <- approxfun(density(z_star_NC_baseline))(z_star_grid_baseline)
z_star_grid_marginal_distrib_baseline <- z_star_grid_marginal_distrib_baseline/sum(z_star_grid_marginal_distrib_baseline)

z_star_grid_dense_marginal_distrib_baseline <- approxfun(density(z_star_NC_baseline))(z_star_grid_dense_baseline)
z_star_grid_dense_marginal_distrib_baseline <- z_star_grid_dense_marginal_distrib_baseline/sum(z_star_grid_dense_marginal_distrib_baseline)

# need this for later
lowest_TFPQ_connected_baseline <- min(firms_1997$TFPQ_resid[firms_1997$connected == "connected"])

# elasticity ratio
elasticity_ratio_baseline <- eta_tilde_baseline/(1-eta_tilde_baseline)

### Create z_star vectors 

# z_star grid long 
z_star_grid_long_baseline <- (crossing(
  z_star = z_star_grid_baseline,
  epsilon = c(1:grid_length)))$z_star 

# z_star grid dense long 
z_star_grid_dense_long_baseline <- (crossing(
  z_star = z_star_grid_dense_baseline,
  epsilon = c(1:grid_dense_length)))$z_star 

# also make z star grid marginal distrib long
z_star_grid_dense_marginal_distrib_long_baseline <- approxfun(density(z_star_NC_baseline))(z_star_grid_dense_long_baseline)

# can create z_tilde outside the loop
z_tilde_grid_long_baseline <- z_tilde_baseline(z_star = z_star_grid_long_baseline)
z_tilde_grid_dense_long_baseline <- z_tilde_baseline(z_star = z_star_grid_dense_long_baseline)

# can create revenue & profits NC outside the loop 
revenue_NC_z_star_grid_long_baseline <- z_tilde_grid_long_baseline
profits_NC_z_star_grid_long_baseline <- (1-profit_tax)*(1-eta_tilde_baseline)*z_tilde_grid_long_baseline

revenue_NC_z_star_grid_dense_long_baseline <- z_tilde_grid_dense_long_baseline
profits_NC_z_star_grid_dense_long_baseline <- (1-profit_tax)*(1-eta_tilde_baseline)*z_tilde_grid_dense_long_baseline

# can also create revenue NC outside the loop

# get targeted moment: connections TFPQ distribution  
quantiles_connected_TFPQ_data_baseline <- sort(firms_1997$TFPQ_resid[firms_1997$connected == "connected"])

# express fixed cost in terms of share of NC profits at quantile cutoff 
quantile_cutoff_baseline <- (ecdf(firms_1997$TFPQ_resid[firms_1997$connected == "non-connected"]))(min(firms_1997$TFPQ_resid[firms_1997$connected == "connected"]))
fixed_cost_profit_normalization <- profits_NC_z_star_grid_long_baseline[which.min(x = abs(z_star_grid_long_baseline - quantile(z_star_NC_baseline, probs = quantile_cutoff_baseline)))]


#### Create functions to solve for optimal subsidies over grid (create both DRS & DRS+convex) ####

# for DRS model
compute_subsidy_DRS_eps_model_grid <- function(z_star_grid, epsilon_grid, z_tilde_grid, theta, elasticity_ratio = elasticity_ratio_baseline, eta_tilde = eta_tilde_baseline){
  
  #### Step 1: Preparation ####
  
  # specify FOC 
  rent_seeking_FOC <- function(m_R,z_tilde_value,epsilon){
    return( z_tilde_value*( (1+epsilon*((m_R)^(theta)))^elasticity_ratio )*theta*epsilon*((m_R)^(theta-1)) - P )
  }
  
  # for lower bound of root finding: use something close to zero 
  lower_bound <- 1e-32
  
  # pick high number for upper bound 
  upper_bound <- 1e+32
  
  #### Step 2: Solve for m_R* using FOCs ####
  
  get_optimal_m_R <- Vectorize(FUN = function(epsilon,z_tilde_value){
    
    # Compute optimal m_R
    optimal_m_R <- (uniroot(
      f = function(x){rent_seeking_FOC(m_R = x, z_tilde_value = z_tilde_value, epsilon = epsilon)},
      interval = c(lower_bound,upper_bound),
      extendInt = "downX", # extend interval in case initial doesnt work (in case upper bound not large enough?)
      tol = 0.0001,
      maxiter = 10000))$root
    
    # return optimal m_R
    return(optimal_m_R)
  }, vectorize.args = c("epsilon","z_tilde_value"))
  
  optimal_m_R <- get_optimal_m_R(epsilon = epsilon_grid, z_tilde_value = z_tilde_grid)
  
  #### Step 3: Solve for remaining objects needed ####
  
  final_results <- tibble(z_star = z_star_grid,
                          epsilon = epsilon_grid,
                          optimal_m_R = optimal_m_R,
                          optimal_subsidy = epsilon_grid*(optimal_m_R^theta),
                          optimal_revenue = z_tilde_grid*( (1 + optimal_subsidy )^(elasticity_ratio + 1) ),
                          optimal_profits_C = (1-profit_tax)*((1-eta_tilde)*optimal_revenue - optimal_m_R),
                          optimal_profits_NC = (1-profit_tax)*(1-eta_tilde)*z_tilde_grid,
                          TFPQ_model = (1+optimal_subsidy)*z_star_grid,
                          revenue_output = z_tilde_grid*((1+optimal_subsidy)^elasticity_ratio)
  )
  
  #### Return final results ####
  
  return(final_results)
}

# for DRS+convex model 
compute_subsidy_DRS_eps_cost_model_grid <- function(z_star_grid, epsilon_grid, z_tilde_grid, parameters, elasticity_ratio = elasticity_ratio_baseline, eta_tilde = eta_tilde_baseline){
  
  #### Step 1: Preparation ####
  
  # get parameters 
  theta <- unname(parameters["theta"])
  cost_level <- unname(parameters["cost_level"])
  cost_elast <- unname(parameters["cost_elasticity"])
  fixed_cost <- fixed_cost_profit_normalization*unname(parameters["fixed_cost"])
  
  # specify FOC 
  rent_seeking_FOC <- function(m_R,z_tilde_value,epsilon){
    return( z_tilde_value*( (1+(epsilon*((m_R)^(theta)) - cost_level*(m_R^cost_elast)))^elasticity_ratio )*( (theta*epsilon*((m_R)^(theta-1)) - cost_level*cost_elast*(m_R^(cost_elast-1))) ) - 1 )
  }
  
  # for lower bound of root finding: use something close to zero 
  lower_bound <- 1e-32
  
  #### Step 2: Solve for m_R* using FOCs ####
  
  get_optimal_m_R <- Vectorize(FUN = function(epsilon,z_tilde_value){
    
    # upper_bound: can pick this as max (never want to spend beyond max subsidy (without constraints))
    upper_bound <- ((theta*epsilon)/(cost_level*cost_elast))^(1/(cost_elast - theta))
    
    # Compute optimal m_R (if epsilon < cost_level, then optimum between 0 & 1 and can just set to max subsidy given low cost)
    optimal_m_R <- (uniroot(
      f = function(x){rent_seeking_FOC(m_R = x, z_tilde_value = z_tilde_value, epsilon = epsilon)},
      interval = c(lower_bound,upper_bound),
      extendInt = "downX", # "downX" extend interval in case initial doesnt work (in case upper bound not large enough?)
      tol = 0.00001,
      maxiter = 10000))$root
    
    # return optimal m_R
    return(optimal_m_R)
  }, vectorize.args = c("epsilon","z_tilde_value"))
  
  optimal_m_R <- get_optimal_m_R(epsilon = epsilon_grid, z_tilde_value = z_tilde_grid)
  
  #### Step 3: Solve for remaining objects needed ####
  
  final_results <- tibble(z_star = z_star_grid,
                          epsilon = epsilon_grid,
                          z_tilde = z_tilde_grid,
                          optimal_m_R = optimal_m_R,
                          optimal_subsidy = epsilon_grid*(optimal_m_R^theta) - cost_level*(optimal_m_R^cost_elast),
                          optimal_revenue = z_tilde_grid*( (1 + optimal_subsidy )^(elasticity_ratio + 1) ),
                          optimal_profits_C = (1-profit_tax)*((1-eta_tilde)*optimal_revenue - optimal_m_R),
                          optimal_profits_NC = (1-profit_tax)*(1-eta_tilde)*z_tilde_grid,
                          TFPQ_model = (1+optimal_subsidy)*z_star_grid) 
  
  final_results <- final_results %>% 
    mutate(
      # update based on fixed cost & selection into connections
      choose_C = optimal_profits_C - fixed_cost > optimal_profits_NC,
      optimal_subsidy = ifelse(choose_C,optimal_subsidy,0.0),
      optimal_m_R = ifelse(choose_C, optimal_m_R, 0.0),
      optimal_revenue = ifelse(choose_C, optimal_revenue, z_tilde),
      optimal_profits = ifelse(choose_C, optimal_profits_C - (1-profit_tax)*fixed_cost, optimal_profits_NC),
      TFPQ_model = (1+optimal_subsidy)*z_star, 
      revenue_output_C = z_tilde_grid*((1+optimal_subsidy)^elasticity_ratio), 
      revenue_output_NC = z_tilde_grid
    )
  
  #### Return final results ####
  
  return(final_results)
}

#### Functions to get joint distribution of subsidies over (z*,eps) ####

# Compute subsidy with DRS technology 
compute_subsidy_DRS_eps_model_joint <- function(parameters, n_sampling = 10000, 
                                                z_star_NC = z_star_NC_baseline, z_star_grid_dense = z_star_grid_dense_baseline, 
                                                z_star_grid_dense_long = z_star_grid_dense_long_baseline, 
                                                z_tilde_grid_dense_long = z_tilde_grid_dense_long_baseline, 
                                                z_star_grid_dense_marginal_distrib_long = z_star_grid_dense_marginal_distrib_long_baseline,
                                                revenue_NC_z_star_grid_dense_long = revenue_NC_z_star_grid_dense_long_baseline,
                                                elasticity_ratio = elasticity_ratio_baseline, eta_tilde = eta_tilde_baseline, n_connect = nrow_connected,
                                                empirical_QR = empirical_QR_baseline){
  
  ##### Step 0: Prepare data given parameters #####
  
  print(parameters)
  
  # get fixed cost into right level 
  fixed_cost <- fixed_cost_profit_normalization*parameters["fixed_cost"]
  
  ### get good range for epsilon (simulating min and max)
  
  # Maximum (already take into account that correlation will be negative!!) 
  set.seed(12345)
  min_z_star <- unname(quantile(x = z_star_NC, probs = c(0.025))) # take 2.5% percentile
  random_draw_eps_max <- rnorm(
    n = 50000, 
    mean = parameters["alpha_eps"] + parameters["beta_eps"]*log(min_z_star),
    sd = sqrt(parameters["variance_eps_z"]))
  eps_max <- unname(quantile(x = random_draw_eps_max, probs = c(0.975))) # take 97.5% percentile 
  
  # Minimum 
  set.seed(12345)
  max_z_star <- unname(quantile(x = z_star_NC, probs = c(0.975))) # take 97.5% percentile
  random_draw_eps_min <- rnorm(
    n = 50000, 
    mean = parameters["alpha_eps"] + parameters["beta_eps"]*log(max_z_star),
    sd = sqrt(parameters["variance_eps_z"]))
  eps_min <- max(1e-32,unname(quantile(x = random_draw_eps_min, probs = c(0.025))))
  
  #
  if(eps_min > eps_max){
    print("Warning: eps min > eps max, will set eps max larger")
    eps_max <- 2*eps_min
  }
  
  # create long dense epsilon grid 
  epsilon_grid_dense_long <- (crossing(
    z_star = z_star_grid_dense,
    epsilon = seq(
      from = eps_min,
      to = eps_max, 
      length.out = grid_dense_length)))$epsilon 
  
  ##### Step 1: Get probability distribution over states #####
  
  # Create df
  results_df <- tibble(
    z_star = z_star_grid_dense_long,
    z_tilde = z_tilde_grid_dense_long, 
    epsilon = epsilon_grid_dense_long,
    z_star_marginal_distrib = z_star_grid_dense_marginal_distrib_long, 
    epsilon_condit_distrib = dnorm(
      x = epsilon,
      mean = parameters["alpha_eps"] + parameters["beta_eps"]*log(z_star),
      sd = sqrt(parameters["variance_eps_z"]))
  )
  
  # Get joint density for each state
  results_df <- results_df %>% 
    group_by(z_star) %>% 
    mutate(
      # first normalize within z_star
      epsilon_condit_distrib = epsilon_condit_distrib/sum(epsilon_condit_distrib)
    ) %>% ungroup() %>% 
    mutate(
      # then get joint density: product of marginal & condit distrib  
      joint_density = epsilon_condit_distrib*z_star_marginal_distrib, 
      # and make sure its normalized to 1 
      joint_density = joint_density/sum(joint_density)
    )
  
  ## Next, draw from joint densities to reduce dimensionality 
  
  # draw representative sub-sample from state space 
  set.seed(12345) 
  sampled_entries <- sample(
    x = c(1:nrow(results_df)), 
    size = n_sampling, replace = T, 
    prob = results_df$joint_density)
  
  # restrict results to sampled entries (to reduce dimensionality)
  results_df <- results_df[sampled_entries,]
  
  ##### Step 2: Fill (restricted) results df with optimal choices  #####
  
  # Directly estimate results for restricted results_df sample 
  results_df <- compute_subsidy_DRS_eps_model_grid(
    z_star_grid = results_df$z_star, 
    epsilon_grid = results_df$epsilon, 
    z_tilde_grid = results_df$z_tilde, 
    theta = parameters["theta"])
  
  # fill in rest 
  results_df <- results_df %>% 
    mutate(
      # update based on fixed cost & selection into connections
      choose_C = optimal_profits_C - fixed_cost > optimal_profits_NC,
      optimal_subsidy = ifelse(choose_C,optimal_subsidy,0.0),
      optimal_m_R = ifelse(choose_C, optimal_m_R, 0.0),
      optimal_revenue = ifelse(choose_C, optimal_revenue, revenue_NC_z_star_grid_dense_long[sampled_entries]),
      optimal_profits = ifelse(choose_C, optimal_profits_C - (1-profit_tax)*fixed_cost, optimal_profits_NC),
      TFPQ_model = (1+optimal_subsidy)*z_star
    )
  
  ##### Step 3: Get results, TFPQ & rent-seeking distribution over connected firms #####
  
  # restrict to firms that would choose to become connected 
  connected_df <- results_df[results_df$choose_C,]
  
  # Build quantiles of TFPR distribution of connected firms in data 
  connected_df <- connected_df %>% arrange(TFPQ_model)
  
  quantiles_connected_TFPQ_model <- unname(quantile(
    x = connected_df$TFPQ_model,
    probs = c(1:n_connect)/(n_connect+1) 
  ))
  
  ##### Step 4: Build & return objective #####
  
  # Compute Quantile ratio in Model 
  model_QR <- (compute_baseline_TFPQ_QR(
    data_connect = tibble(
      index = c(1:n_connect),
      TFPQ_resid = quantiles_connected_TFPQ_model), 
    data_nonconnect = results_df %>% rename(TFPQ_resid = z_star), 
    cutoff = 0.0))$TFPQ_QR
  
  # evaluate objective over quantiles of TFPQ
  objective_QR_moment <- sum( (empirical_QR - model_QR)^2 )
  objective_QR_moment_norm <- objective_QR_moment/(sum( (mean(empirical_QR) - empirical_QR)^2 ))
  
  # Also compute implied probability of becoming connected 
  proba_C <- sum(results_df$choose_C)/n_sampling
  
  # return 
  return(list(objective = objective_QR_moment_norm, 
              results_df = results_df, 
              quantiles_model = quantiles_connected_TFPQ_model, 
              proba_C = proba_C
  ))
}

# Compute subsidy with DRS + costs technology (Baseline!)
compute_subsidy_DRS_eps_cost_model_joint <- function(parameters, n_sampling = 10000, 
                                                     z_star_NC = z_star_NC_baseline, z_star_grid_dense = z_star_grid_dense_baseline, 
                                                     z_star_grid_dense_long = z_star_grid_dense_long_baseline, 
                                                     z_tilde_grid_dense_long = z_tilde_grid_dense_long_baseline, 
                                                     z_star_grid_dense_marginal_distrib_long = z_star_grid_dense_marginal_distrib_long_baseline,
                                                     revenue_NC_z_star_grid_dense_long = revenue_NC_z_star_grid_dense_long_baseline,
                                                     elasticity_ratio = elasticity_ratio_baseline, eta_tilde = eta_tilde_baseline, n_connect = nrow_connected,
                                                     empirical_QR = empirical_QR_baseline){
  
  ##### Step 0: Prepare data given parameters #####
  
  print(parameters)
  
  # get fixed cost into right level 
  fixed_cost <- fixed_cost_profit_normalization*parameters["fixed_cost"]
  
  ### get good range for epsilon (simulating min and max)
  
  # Maximum (already take into account that correlation will be negative!!) 
  set.seed(12345)
  min_z_star <- unname(quantile(x = z_star_NC, probs = c(0.025))) # take 2.5% percentile
  random_draw_eps_max <- rnorm(
    n = 50000, 
    mean = parameters["alpha_eps"] + parameters["beta_eps"]*log(min_z_star),
    sd = sqrt(parameters["variance_eps_z"]))
  eps_max <- unname(quantile(x = random_draw_eps_max, probs = c(0.975))) # take 97.5% percentile 
  
  # Minimum 
  set.seed(12345)
  max_z_star <- unname(quantile(x = z_star_NC, probs = c(0.975))) # take 97.5% percentile
  random_draw_eps_min <- rnorm(
    n = 50000, 
    mean = parameters["alpha_eps"] + parameters["beta_eps"]*log(max_z_star),
    sd = sqrt(parameters["variance_eps_z"]))
  eps_min <- max(1e-32,unname(quantile(x = random_draw_eps_min, probs = c(0.025))))
  
  #
  if(eps_min > eps_max){
    print("Warning: eps min > eps max, will set eps max larger")
    eps_max <- 2*eps_min
  }
  
  # create long dense epsilon grid 
  epsilon_grid_dense_long <- (crossing(
    z_star = z_star_grid_dense,
    epsilon = seq(
      from = eps_min,
      to = eps_max, 
      length.out = grid_dense_length)))$epsilon 
  
  ##### Step 1: Get probability distribution over states #####
  
  # Create df
  results_df <- tibble(
    z_star = z_star_grid_dense_long,
    z_tilde = z_tilde_grid_dense_long, 
    epsilon = epsilon_grid_dense_long,
    z_star_marginal_distrib = z_star_grid_dense_marginal_distrib_long, 
    epsilon_condit_distrib = dnorm(
      x = epsilon,
      mean = parameters["alpha_eps"] + parameters["beta_eps"]*log(z_star),
      sd = sqrt(parameters["variance_eps_z"]))
  )
  
  # Get joint density for each state
  results_df <- results_df %>% 
    group_by(z_star) %>% 
    mutate(
      # first normalize within z_star
      epsilon_condit_distrib = epsilon_condit_distrib/sum(epsilon_condit_distrib),
      # make sure that if all entries are 0 within z-star, then set to equal weight, not NaN 
      epsilon_condit_distrib = ifelse(is.nan(epsilon_condit_distrib), 1/n(), epsilon_condit_distrib)
    ) %>% ungroup() %>% 
    mutate(
      # then get joint density: product of marginal & condit distrib  
      joint_density = epsilon_condit_distrib*z_star_marginal_distrib, 
      # and make sure its normalized to 1 
      joint_density = joint_density/sum(joint_density)
    )
  
  ## Next, draw from joint densities to reduce dimensionality 
  
  # draw representative sub-sample from state space 
  set.seed(12345) 
  sampled_entries <- sample(
    x = c(1:nrow(results_df)), 
    size = n_sampling, replace = T, 
    prob = results_df$joint_density)
  
  # restrict results to sampled entries (to reduce dimensionality)
  results_df <- results_df[sampled_entries,]
  
  ##### Step 2: Fill (restricted) results df with optimal choices  #####
  
  # Directly estimate results for restricted results_df sample 
  results_df <- compute_subsidy_DRS_eps_cost_model_grid(
    z_star_grid = results_df$z_star, 
    epsilon_grid = results_df$epsilon, 
    z_tilde_grid = results_df$z_tilde, 
    parameters = parameters, 
    elasticity_ratio = elasticity_ratio, 
    eta_tilde = eta_tilde)
  
  ##### Step 3: Get results, TFPQ & rent-seeking distribution over connected firms #####
  
  # restrict to firms that would choose to become connected 
  connected_df <- results_df[results_df$choose_C,]
  
  # Build quantiles of TFPR distribution of connected firms in data 
  connected_df <- connected_df %>% arrange(TFPQ_model)
  
  quantiles_connected_TFPQ_model <- unname(quantile(
    x = connected_df$TFPQ_model,
    probs = c(1:n_connect)/(n_connect+1) 
  ))
  
  ##### Step 4: Build & return objective #####
  
  # Compute Quantile ratio in Model 
  model_QR <- (compute_baseline_TFPQ_QR(
    data_connect = tibble(
      index = c(1:n_connect),
      TFPQ_resid = quantiles_connected_TFPQ_model), 
    data_nonconnect = results_df %>% rename(TFPQ_resid = z_star), 
    cutoff = 0.0))$TFPQ_QR
  
  # evaluate objective over quantiles of TFPQ
  objective_QR_moment <- sum( (empirical_QR - model_QR)^2 )
  objective_QR_moment_norm <- objective_QR_moment/(sum( (mean(empirical_QR) - empirical_QR)^2 ))
  
  # Also compute implied probability of becoming connected 
  proba_C <- sum(results_df$choose_C)/n_sampling
  
  # return 
  return(list(objective = objective_QR_moment_norm, 
              results_df = results_df, 
              quantiles_model = quantiles_connected_TFPQ_model, 
              proba_C = proba_C
  ))
}

#### Find optimal DRS parameters #### 

# start with initial guess 
guess_param_DRS <- c(
  "fixed_cost" = 0.0,
  "theta" = 1 - eta_tilde_baseline, # (limit is 1-eta_tilde)
  "alpha_eps" = 0.185, 
  "beta_eps" = -0.05, 
  "variance_eps_z" = 0.0001125)

# check that get results given guess 
test_DRS_eps <- compute_subsidy_DRS_eps_model_joint(parameters = guess_param_DRS, n_sampling = 10000)

## Objective works! 
test_DRS_eps$objective

# optimal subsidies
summary(test_DRS_eps$results_df$optimal_subsidy)

# test that marginal distribution of z* is indeed the same!! 
ggplot() + 
  geom_density(aes(x = log(test_DRS_eps$results_df$z_star), color = "Model")) + 
  geom_density(aes(x = log(z_star_NC_baseline), color = "Data"))

## Solve for optimal parameters (previous grid search showed that higher theta is better, so take maximum allowed by theory = 1 - eta_tilde. Need to add a bit of gap, otherwise not stable)

# get initial parameter guess 
initial_parameter_DRS_eps <- c(
  "alpha_eps" = 0.185, 
  "beta_eps" = -0.05,
  "variance_eps_z" = 0.0001125,
  "theta" = 0.155)

### Find parameters using gradient-based "L-BFGS-B" (box-constrained Newton method)

## Start optim parallel (to run in parallel)
cl <- makeCluster(detectCores() - 2, type="FORK")
setDefaultCluster(cl = cl)

# Find optimal parameters (starting from good initial guess & relatively tight bounds!)
optimal_DRS_eps <- optimParallel( 
  par = initial_parameter_DRS_eps/initial_parameter_DRS_eps, 
  fn = function(par){
    parameters <- par*initial_parameter_DRS_eps
    
    # check parameter combinations 
    if(mean(parameters["alpha_eps"] + parameters["beta_eps"]*log(z_star_NC_baseline)) < 0){
      return(1e12*(1/parameters["alpha_eps"]))
    } else{
      results <- compute_subsidy_DRS_eps_model_joint(
        parameters = c(parameters,"fixed_cost" = 0.0,
                       "theta" = 0.155),
        n_sampling = 5000)
      print(paste("Objective is: ", results$objective))
      return(results$objective)
    }
  },
  method = "L-BFGS-B",
  lower = c(0.5,0.5,0.5,0.9), 
  upper = c(2.0,2.0,2.0,0.158/0.155), 
  control = list(factr = 1e8) 
)

stopCluster(cl) 

# check results 
optimal_DRS_eps$par*initial_parameter_DRS_eps
optimal_DRS_eps$value # R2 = 0.496%, obj = 0.504
optimal_DRS_eps$message # converged! 

# implied rho? -0.976 (also: almost perfect negative correlation) 
ratio_beta_sigma_DRS <- unname(((optimal_DRS_eps$par*initial_parameter_DRS_eps)["beta_eps"])/sqrt((optimal_DRS_eps$par*initial_parameter_DRS_eps)["variance_eps_z"]))
ratio_beta_sigma_DRS/sqrt((ratio_beta_sigma_DRS)^2 + 1)

# now find these results 
results_optimal_DRS_eps <- compute_subsidy_DRS_eps_model_joint(
  parameters = c(optimal_DRS_eps$par*initial_parameter_DRS_eps,"fixed_cost" = 0.0), 
  n_sampling = 50000)

ggplot() + 
  geom_line(aes(x = c(1:nrow_connected)/(nrow_connected+1), 
                y = quantiles_connected_TFPQ_data_baseline, 
                color = "Data")) + 
  geom_line(aes(x = c(1:nrow_connected)/(nrow_connected+1), 
                y = results_optimal_DRS_eps$quantiles_model, 
                color = "Model (test)")) +
  ylab("TFPQ (connected)") + 
  xlab("Quantile")

ggplot() + 
  geom_density(aes(x = quantiles_connected_TFPQ_data_baseline, fill = "Data"), alpha = 0.5) + 
  geom_density(aes(x = results_optimal_DRS_eps$quantiles_model, fill = "Model"), alpha = 0.5) + 
  theme_classic() + 
  ylab("Density") + 
  xlab("TFPQ")

# compute TFPQ quantile ratio in model 
model_DRS_TFPQ_QR_noF <- compute_baseline_TFPQ_QR(
  data_connect = tibble(
    index = c(1:241),
    TFPQ_resid = results_optimal_DRS_eps$quantiles_model), 
  data_nonconnect = results_optimal_DRS_eps$results_df %>% 
    rename(TFPQ_resid = z_star), 
  cutoff = 0.0)

# Plot estimated TFPQ QR schedule  
ggplot() + 
  geom_line(data = CI_baseline_TFPQ_QR_bs_upper, aes(x = productivity_order, y = TFPQ_QR, group = Percentiles), 
            alpha = 0.5, linetype = "dashed") + 
  geom_point(aes(x = c(1:241), y = baseline_TFPQ_QR_noF$TFPQ_QR, color = "Data"), shape = 4) + 
  geom_line(aes(x = c(1:241), y = model_DRS_TFPQ_QR_noF$TFPQ_QR, color = "Model"), linewidth = 1.0) + 
  geom_hline(yintercept = 0, linetype = "longdash") + 
  ylab(label = "TFPQ Quantile Ratio") + 
  xlab(label = "TFPQ Rank") + 
  scale_color_discrete(name = "Type:") + 
  theme_classic(base_size = 16, base_family = "LM Roman 10") + 
  theme(legend.position = "bottom")

#### Find optimal DRS+convex parameters #### 

# start with initial guess 
guess_param_DRS_convex <- c(
  "fixed_cost" = 0.0, 
  "theta" = 0.2, 
  "alpha_eps" = 0.041, 
  "beta_eps" = -0.011, 
  "variance_eps_z" = 4.656e-07, 
  "cost_level" = 1e-08, 
  "cost_elasticity" = 1.04)

# check that get results given guess 
test_DRS_eps_cost <- compute_subsidy_DRS_eps_cost_model_joint(parameters = guess_param_DRS_convex, n_sampling = 10000)

## Objective seems to work! 
test_DRS_eps_cost$objective

# optimal subsidies
summary(test_DRS_eps_cost$results_df$optimal_subsidy)

# test that marginal distribution of z* is indeed the same!! 
ggplot() + 
  geom_density(aes(x = log(test_DRS_eps_cost$results_df$z_star), color = "Model")) + 
  geom_density(aes(x = log(z_star_NC_baseline), color = "Data"))

### Algorithm: Gradient-based method, but fix elasticities (grid-search showed that these give the best fit) 

# then solve for optimal parameters 
initial_parameter_DRS_eps_cost <- c(
  "alpha_eps" = 0.041, 
  "beta_eps" = -0.011, 
  "variance_eps_z" = 4.656e-07, 
  "cost_level" = 1e-08, 
  "theta" = 0.2,
  "cost_elasticity" = 1.04)

## Start optim parallel 
cl <- makeCluster(detectCores() - 2, type="FORK")
setDefaultCluster(cl = cl)

# Find optimal parameters (starting from good initial guess & relatively tight bounds!)
optimal_DRS_eps_cost <- optimParallel( 
  par = initial_parameter_DRS_eps_cost/initial_parameter_DRS_eps_cost, 
  fn = function(par){
    parameters <- par*initial_parameter_DRS_eps_cost
    
    # check parameter combinations 
    if(mean(parameters["alpha_eps"] + parameters["beta_eps"]*log(z_star_NC_baseline)) < 0.01){
      return(1e12*(1/parameters["alpha_eps"]))
    } else{
      results <- compute_subsidy_DRS_eps_cost_model_joint(
        parameters = c(parameters,"fixed_cost" = 0.0), 
        n_sampling = 10000)
      print(paste("Objective is: ", results$objective))
      return(results$objective)
    }
  },
  method = "L-BFGS-B",
  lower = c(0.5,0.5,0.5,0.5,1.0,1/1.04), 
  upper = c(2.0,2.0,2.0,2.0,1.0,1.1) 
)

stopCluster(cl) 

optimal_DRS_eps_cost$par*initial_parameter_DRS_eps_cost
optimal_DRS_eps_cost$value # R2 = 0.855, obj = 0.145
optimal_DRS_eps_cost$message # Converged! 

### Solve for remaining parameters ### 

# solve for correlation rho: -0.999
ratio_beta_sigma <- ((optimal_DRS_eps_cost$par*initial_parameter_DRS_eps_cost)["beta_eps"])/sqrt((optimal_DRS_eps_cost$par*initial_parameter_DRS_eps_cost)["variance_eps_z"])
baseline_estimate_rho <- ratio_beta_sigma/sqrt((ratio_beta_sigma)^2 + 1)

# save parameters 
baseline_parameters <- c(
  optimal_DRS_eps_cost$par*initial_parameter_DRS_eps_cost,
  "fixed_cost" = 0.0, 
  "proba_c" = nrow_connected/(nrow_connected+nrow_non_connected), 
  "rho" = unname(baseline_estimate_rho)
)

# also save as dataframe (to read in Julia)
baseline_parameters_df <- tibble(
  "alpha_eps" = (optimal_DRS_eps_cost$par*initial_parameter_DRS_eps_cost)[[1]],
  "beta_eps" = (optimal_DRS_eps_cost$par*initial_parameter_DRS_eps_cost)[[2]],
  "variance_eps_z" = (optimal_DRS_eps_cost$par*initial_parameter_DRS_eps_cost)[[3]],
  "cost_level" = (optimal_DRS_eps_cost$par*initial_parameter_DRS_eps_cost)[[4]],
  "fixed_cost" = 0.0, 
  "theta" = (optimal_DRS_eps_cost$par*initial_parameter_DRS_eps_cost)[[5]], 
  "cost_elasticity" = (optimal_DRS_eps_cost$par*initial_parameter_DRS_eps_cost)[[6]], 
  "proba_c" = nrow_connected/(nrow_connected+nrow_non_connected), 
  "rho" = unname(baseline_estimate_rho)
)
# write_csv(x = baseline_parameters_df, file = "Computation/data/baseline_parameters.csv")

# now find these results 
results_optimal_DRS_eps_cost <- compute_subsidy_DRS_eps_cost_model_joint(
  parameters = c(optimal_DRS_eps_cost$par*initial_parameter_DRS_eps_cost,
                 "fixed_cost" = 0.0), 
  n_sampling = 50000)

ggplot() + 
  geom_line(aes(x = c(1:nrow_connected)/(nrow_connected+1), 
                y = quantiles_connected_TFPQ_data_baseline, 
                color = "Data")) + 
  geom_line(aes(x = c(1:nrow_connected)/(nrow_connected+1), 
                y = results_optimal_DRS_eps_cost$quantiles_model, 
                color = "Model (test)")) +
  ylab("TFPQ (connected)") + 
  xlab("Quantile")

ggplot() + 
  geom_density(aes(x = quantiles_connected_TFPQ_data_baseline, fill = "Data"), alpha = 0.5) + 
  geom_density(aes(x = results_optimal_DRS_eps_cost$quantiles_model, fill = "Model"), alpha = 0.5) + 
  theme_classic() + 
  ylab("Density") + 
  xlab("TFPQ")

# compute TFPQ quantile ratio in model 
model_DRS_convex_TFPQ_QR_noF <- compute_baseline_TFPQ_QR(
  data_connect = tibble(
    index = c(1:241),
    TFPQ_resid = results_optimal_DRS_eps_cost$quantiles_model), 
  data_nonconnect = results_optimal_DRS_eps_cost$results_df %>% 
    rename(TFPQ_resid = z_star), 
  cutoff = 0.0)

# Plot estimated TFPQ QR schedule  
ggplot() + 
  geom_line(data = CI_baseline_TFPQ_QR_bs_upper, aes(x = productivity_order, y = TFPQ_QR, group = Percentiles), 
            alpha = 0.5, linetype = "dashed") + 
  geom_point(aes(x = c(1:241), y = baseline_TFPQ_QR_noF$TFPQ_QR, color = "Data"), shape = 4) + 
  geom_line(aes(x = c(1:241), y = model_DRS_convex_TFPQ_QR_noF$TFPQ_QR, color = "Model"), linewidth = 1.0) + 
  geom_hline(yintercept = 0, linetype = "longdash") + 
  ylab(label = "TFPQ Quantile Ratio") + 
  xlab(label = "TFPQ Rank") + 
  scale_color_discrete(name = "Type:") + 
  theme_classic(base_size = 16, base_family = "LM Roman 10") + 
  theme(legend.position = "bottom")

## Check implied (differential) intermediate input share 
model_implied_interm_inputs <- results_optimal_DRS_eps_cost$results_df %>% 
  mutate(
    intermediate_share = (optimal_m_R + gamma_gross*optimal_revenue)/optimal_revenue, 
    diff_intermediate_share = optimal_m_R/optimal_revenue
  ) 
mean(model_implied_interm_inputs$diff_intermediate_share) # 4pp

#### Plot results ####

# get data moments 
main_results_df <- firms_1997 %>% select(TFPQ_resid,connected) %>% 
  mutate(Type = "Data") %>% 
  rename(TFPQ = TFPQ_resid)

# add model moments
main_results_df <- rbind(
  main_results_df, 
  results_optimal_DRS_eps_cost$results_df %>% 
    filter(choose_C) %>% 
    select(TFPQ_model) %>% 
    mutate(connected = "connected", Type = "Model") %>% 
    rename(TFPQ = TFPQ_model),
  results_optimal_DRS_eps_cost$results_df %>% 
    select(z_star) %>% 
    mutate(connected = "non-connected", Type = "Model") %>% 
    rename(TFPQ = z_star)
)

## Figure 3 (left panel): TFPQ distributions ##

plot_TFPQ_distrib_model <- ggplot(
  main_results_df %>% 
    mutate(connected = case_when(
      connected == "connected" ~ "C", 
      connected == "non-connected" ~ "NC"
    )),  
  aes(x = log(TFPQ), y = connected, color = Type, fill = Type)) +
  geom_density_ridges(
    alpha = 0.6, color = "black", quantile_lines = TRUE, quantiles = 2,
    show.legend = TRUE, bandwidth = 0.075) +
  scale_y_discrete(name = "") +
  scale_fill_manual(values = c("#e74c3c","#3498db"), labels = c("Data", "Model (Baseline)"), name = "Type:") + # "#F9918A","#33CCD0" c("#D55E0050", "#0072B250")
  theme_classic(base_size = 16, base_family = "LM Roman 10") +
  theme(legend.position = "bottom") +
  xlab("log TFPQ")

## Figure 3 (right panel): TFPQ QR schedule ##

# Plot estimated TFPQ QR schedule  
plot_TFPQ_QR_model <- ggplot() + 
  geom_line(data = CI_baseline_TFPQ_QR_bs_upper, aes(x = (productivity_order/241)*100, y = TFPQ_QR, group = Percentiles), 
            alpha = 0.5, linetype = "dashed") + 
  geom_point(aes(x = (c(1:241)/241)*100, y = baseline_TFPQ_QR_noF$TFPQ_QR, color = "Data"), shape = 4) + 
  geom_line(aes(x = (c(1:241)/241)*100, y = model_DRS_TFPQ_QR_noF$TFPQ_QR, color = "Model (DRS)"), linewidth = 0.5) + 
  geom_line(aes(x = (c(1:241)/241)*100, y = model_DRS_convex_TFPQ_QR_noF$TFPQ_QR, color = "Model (Baseline)"), linewidth = 1.2) + 
  geom_hline(yintercept = 0, linetype = "longdash") + 
  ylab(label = "TFPQ Quantile Ratio") + 
  xlab(label = "TFPQ Percentiles") + 
  scale_color_manual(values = c("#e74c3c","#3498db","#6A6D6D"), name = "Type:") + 
  theme_classic(base_size = 16, base_family = "LM Roman 10") + 
  theme(legend.position = "bottom")

ggarrange(
  plot_TFPQ_distrib_model + labs(caption="(A) TFPQ distributions C vs NC ") + theme(plot.caption = element_text(hjust=0.5, size=rel(1.2))), 
  plot_TFPQ_QR_model + labs(caption="(B) TFPQ quantile ratio") + theme(plot.caption = element_text(hjust=0.5, size=rel(1.2))),
  ncol = 2, nrow = 1)  
ggsave(width = 12, height = 5, dpi = 200,
       "R/output/Figure3.pdf") 

################ Baseline results Productivity Process ################

#### Data moments ####

# In data: select non-connected firms in 1997 that survive until 1998. 
# Get their z* (pre) and z* (post)
# In data, run regression of log(TFPQ post) on log(TFPQ pre) & collect regression coefficients 
lm_prod_process <- lm(formula = log_z_star_next_NC_survivors ~ log_z_star_NC_survivors)

# Moments are 2 coefficients & variance 
lm_prod_process$coefficients
var(lm_prod_process$residuals)

#### Model & solve for optimal parameters ####

# Write objective function for estimating productivity process 
objective_productivity_process <- function(
    productivity_param,
    parameters = baseline_parameters){
  
  ##### Step 0: Prepare data given parameters (can move this out!) #####
  
  print(productivity_param)
  
  # extract parameters
  rho_z <- productivity_param["rho_z"]
  mean_zeta_star <- productivity_param["mean_zeta_star"]
  var_zeta_star <- productivity_param["var_zeta_star"]
  
  ##### Step 1: Update marginal productivity distribution of survivors given productivity_param #####
  
  # draw new log_z_star (could also have many more rnorm draws!) 
  set.seed(12345)
  log_z_star_new <- rho_z*log_z_star_NC_survivors + rnorm(n = length(log_z_star_NC_survivors), mean = mean_zeta_star, sd = sqrt(var_zeta_star))
  
  ##### Step 2: Draw new epsilon based on new z_star #####
  
  # Draw new epsilon based on new z_star 
  set.seed(12345)
  new_epsilon <- rnorm(n = length(log_z_star_NC_survivors), 
                       mean = parameters["alpha_eps"] + parameters["beta_eps"]*log_z_star_new, 
                       sd = sqrt(parameters["variance_eps_z"])
  )
  
  ##### Step 3: Solve for optimal policies/subsidies ##### 
  
  results_df <- compute_subsidy_DRS_eps_cost_model_grid(
    z_star_grid = exp(log_z_star_new), 
    epsilon_grid = new_epsilon, 
    z_tilde_grid = z_tilde_baseline(z_star = exp(log_z_star_new)), 
    parameters = parameters)
  
  ##### Step 4: Construct objective ##### 
  
  # create regression dataset
  reg_df <- rbind(
    # first dataset only non-connected 
    results_df %>% select(z_star) %>% 
      mutate(weight = 1 - unname(parameters["proba_c"]),
             TFPQ_prev = exp(log_z_star_NC_survivors)) %>%  
      rename(TFPQ = z_star), 
    # second dataset only connected 
    results_df %>% select(TFPQ_model) %>% 
      mutate(weight = unname(parameters["proba_c"]), 
             TFPQ_prev = exp(log_z_star_NC_survivors)) %>%  
      rename(TFPQ = TFPQ_model)
  )
  
  # Now run regression 
  main_regression <- lm(formula = log(TFPQ) ~ log(TFPQ_prev), data = reg_df, weights = reg_df$weight)
  
  # construct objective 
  error_constant <- abs(main_regression$coefficients[[1]] - lm_prod_process$coefficients[[1]])/lm_prod_process$coefficients[[1]]
  error_rho <- abs(main_regression$coefficients[[2]] - lm_prod_process$coefficients[[2]])/lm_prod_process$coefficients[[2]]
  error_var <- abs(matrixStats::weightedVar(x = main_regression$residuals, w = reg_df$weight) - var(lm_prod_process$residuals))/var(lm_prod_process$residuals)
  
  # full objective is sum of errors 
  objective <- sum(error_constant^2 + error_rho^2 + error_var^2)
  
  # return 
  return(list(objective = objective, 
              moment1 = main_regression$coefficients[[1]], 
              moment2 = main_regression$coefficients[[2]], 
              moment3 = matrixStats::weightedVar(x = main_regression$residuals, w = reg_df$weight)))
}

initial_param_guess_prod_process <- c(
  "rho_z" = lm_prod_process$coefficients[[2]], 
  "mean_zeta_star" = lm_prod_process$coefficients[[1]], 
  "var_zeta_star" = var(lm_prod_process$residuals)
) 

# test 
objective_productivity_process(productivity_param = initial_param_guess_prod_process)

## Start optim parallel 
cl <- makeCluster(detectCores() - 2, type="FORK")
setDefaultCluster(cl = cl)

# Find optimal parameters (starting from good initial guess & relatively tight bounds!)
optimal_param_prod_process <- optimParallel( 
  par = initial_param_guess_prod_process, 
  fn = function(par){
    result <- objective_productivity_process(productivity_param = par)
    print(paste("Objective is: ", result$objective))
    return(result$objective)
  },
  method = "L-BFGS-B",
  lower = c(0.9,0.06,0.01), 
  upper = c(1.0,0.15,0.03)
)

stopCluster(cl) 

optimal_param_prod_process$par
optimal_param_prod_process$value # objective < 1e-11
optimal_param_prod_process$message # converged 

# solve these 
optimal_param_prod_process_results <- objective_productivity_process(productivity_param = optimal_param_prod_process$par)

# Use these to add to paper & Julia code 

################ Baseline results Exit/Entry processes ################

#### Step 1: Discretize z and get transition matrix over z ####

# from Tauchen -- could also use Rouwenhorst!
library(Rtauchen)

# get transition matrix 
transition_z <- Rtauchen(
  ne = 1000, 
  ssigma_eps = sqrt(optimal_param_prod_process$par[[3]]), 
  llambda_eps = optimal_param_prod_process$par[[1]], 
  m = 2.5)
# write_csv(x = as_tibble(transition_z), file = "Computation/data/transition_matrix_z.csv")

# get corresponding grid (and make sure mean is correct!) 
mean_uncond_log_z_star <- (optimal_param_prod_process$par[[2]]/(1-optimal_param_prod_process$par[[1]]))
log_z_star_grid_EE <- as.vector(Tgrid(
  ne = 1000, 
  ssigma_eps = sqrt(optimal_param_prod_process$par[[3]]), 
  llambda_eps = optimal_param_prod_process$par[[1]], 
  m = 2.5) + mean_uncond_log_z_star)

z_star_grid_EE <- exp(log_z_star_grid_EE)
# write_csv(x = tibble(z_star = z_star_grid_EE), file = "Computation/data/z_star_grid.csv")

# test that this gives correct numbers: looks good! 

test_prod_process <- tibble(log_z_star = log_z_star_grid_EE,
                            log_z_star_next = NA)
for(row in c(1:nrow(test_prod_process))){
  test_prod_process$log_z_star_next[row] <- sample(x = log_z_star_grid_EE, size = 1, replace = T, prob = transition_z[row,])
}

test_lm_prod_process <- lm(data = test_prod_process, formula = log_z_star_next ~ log_z_star)
summary(test_lm_prod_process) # coefficients in line with mean & persistence 
sd(test_lm_prod_process$residuals) # sd of residual also in line
rm(test_lm_prod_process)
rm(test_prod_process)

#### Step 2: For each z, solve for optimal profits including for grid over epsilon ####

# create array to save (expected) profits conditional on z (both for NC & C)
profits_grid_EE <- tibble(
  z_star = z_star_grid_EE,
  z_tilde = z_tilde_baseline(z_star = z_star_grid_EE), 
  profits_C_expected = rep(NA, length(z_star_grid_EE)),
  profits_NC = rep(NA, length(z_star_grid_EE))
)

get_expected_profits_C_EE <- function(row, n_eps){ 
  
  ### Substep 1: Draw epsilon conditional on z_star & the probability 
  set.seed(12345)
  epsilon <- rnorm(
    n = n_eps, 
    mean = baseline_parameters[["alpha_eps"]] + baseline_parameters[["beta_eps"]]*log_z_star_grid_EE[row], 
    sd = sqrt(baseline_parameters[["variance_eps_z"]])
  )
  
  proba_epsilon <- dnorm(x = epsilon, 
                         mean = baseline_parameters[["alpha_eps"]] + baseline_parameters[["beta_eps"]]*log_z_star_grid_EE[row], 
                         sd = sqrt(baseline_parameters[["variance_eps_z"]]))
  
  # normalize probability to make sure it sums to 1  
  proba_epsilon <- proba_epsilon/sum(proba_epsilon)
  
  ### Substep 2: Get optimal profits 
  
  results_df <- compute_subsidy_DRS_eps_cost_model_grid(
    z_star_grid = rep(z_star_grid_EE[row], n_eps), 
    epsilon_grid = epsilon, 
    z_tilde_grid = rep(profits_grid_EE$z_tilde[row], n_eps),
    parameters = baseline_parameters)
  
  # get expected profits by taking weighted mean across all possible epsilon 
  expected_profits_C <- sum(results_df$optimal_profits_C*proba_epsilon)
  expected_profits_NC <- sum(results_df$optimal_profits_NC*proba_epsilon)
  
  # collect further objects needed for later 
  expected_m_R_C <- sum(results_df$optimal_m_R*proba_epsilon)
  expected_subsidies_C <- sum((results_df$optimal_revenue)*(results_df$optimal_subsidy/(1 + results_df$optimal_subsidy))*proba_epsilon)
  # add VAT revenue here 
  # add profit tax revenue here 
  expected_revenue_output_C <- sum(results_df$revenue_output_C*proba_epsilon)
  expected_revenue_output_NC <- sum(results_df$revenue_output_NC*proba_epsilon)
  expected_subsidy_rate_C <- sum(results_df$optimal_subsidy*proba_epsilon)
  expected_revenue_C <- sum(results_df$optimal_revenue*proba_epsilon)
  expected_revenue_NC <- sum(results_df$z_tilde*proba_epsilon)
  
  return(c("expected_profits_C" = expected_profits_C, 
           "expected_profits_NC" = expected_profits_NC, 
           "expected_m_R_C"= expected_m_R_C,
           "expected_subsidies_C" = expected_subsidies_C,
           "expected_revenue_output_C" = expected_revenue_output_C, 
           "expected_revenue_output_NC" = expected_revenue_output_NC, 
           "expected_subsidy_rate_C" = expected_subsidy_rate_C,
           "expected_revenue_C" = expected_revenue_C, 
           "expected_revenue_NC" = expected_revenue_NC))
}

# compute profits (takes a moment!) & other optimal choices 
profits_matrix_EE <- sapply(X = c(1:length(z_star_grid_EE)), FUN = function(x){get_expected_profits_C_EE(row = x, n_eps = 100)})
profits_grid_EE$profits_C_expected <- profits_matrix_EE[1,]
profits_grid_EE$profits_NC <- profits_matrix_EE[2,]
profits_grid_EE$profits_expected <- baseline_parameters[["proba_c"]]*profits_grid_EE$profits_C_expected + (1-baseline_parameters[["proba_c"]])*profits_grid_EE$profits_NC
profits_grid_EE$rent_seeking_C_expected <- profits_matrix_EE[3,]
profits_grid_EE$subsidies_expected <- baseline_parameters[["proba_c"]]*profits_matrix_EE[4,]
profits_grid_EE$revenue_output_expected <- baseline_parameters[["proba_c"]]*profits_matrix_EE[5,] + (1-baseline_parameters[["proba_c"]])*profits_matrix_EE[6,]
profits_grid_EE$revenue_C_expected <- profits_matrix_EE[8,]
profits_grid_EE$revenue_NC <- profits_matrix_EE[9,]
profits_grid_EE$revenue_expected <- baseline_parameters[["proba_c"]]*profits_grid_EE$revenue_C_expected + (1-baseline_parameters[["proba_c"]])*profits_grid_EE$revenue_NC
profits_grid_EE$subsidy_rate_C_expected <- profits_matrix_EE[7,]

# can export this 
# write_csv(x = profits_grid_EE, file = "Computation/data/profits_grid_baseline.csv")

#### Step 3: VFI: Solve expected VF over z conditional on guess for Gumbel parameters ####

# VFI function 
VFI_EE <- function(guess_exp_VF,param_EE,crit = 1e-6,verbose=T){
  
  ### Step 0: Prepare objects 
  
  # get parameters 
  level_fcost <- param_EE[["level_fcost"]]
  scale_fcost <- param_EE[["scale_fcost"]]
  
  # initialize objects 
  exp_VF_new <- guess_exp_VF
  exp_profits <- profits_grid_EE$profits_expected
  
  # create function to compute expected fixed costs 
  compute_exp_fcost <- function(exit_proba,exp_VF){
    
    # make sure that gamma incomplete is robust regarding "too small" values 
    gamma_incomplete <- case_when(
      # exit_proba too close to 1 
      -log(exit_proba) < 1e-250 ~ expint::gammainc(a = 0,x = 1e-250),
      # exit_proba too close to 0 
      exit_proba < 1e-250 ~ 0.0, 
      .default = expint::gammainc(a = 0,x = -log(exit_proba))
    )
    
    # compute expected fixed cost 
    exp_fcost <- beta*exp_VF*exit_proba - scale_fcost*gamma_incomplete
    
    return(exp_fcost)
  }
  
  ### Step 1: While loop
  iteration <- 1 
  VF_diff <- Inf 
  
  while (VF_diff > crit) {
    
    # print progress
    if(verbose){
      print(paste("Iteration",iteration,"with difference:",VF_diff))
    }
    
    # get exit probability given new guess 
    exit_proba <- 1 - exp(-exp(-(beta*exp_VF_new - level_fcost)/scale_fcost))
    exp_fcost <- compute_exp_fcost(exit_proba = exit_proba, exp_VF = exp_VF_new)
    
    # compute new VF (over all z)
    VF_new <- exp_profits + (1-exit_proba)*( -exp_fcost + beta*exp_VF_new )
    
    # save old results 
    exp_VF_old <- exp_VF_new
    
    # compute new expected VF 
    exp_VF_new <- transition_z %*% VF_new
    
    # update iteration & compute difference 
    iteration <- iteration + 1 
    VF_diff <- max(abs(exp_VF_old - exp_VF_new))
    
  }
  
  # update results
  exit_proba <- 1 - exp(-exp(-(beta*exp_VF_new - level_fcost)/scale_fcost))
  exp_fcost <- compute_exp_fcost(exit_proba = exit_proba, exp_VF = exp_VF_new)
  VF <- exp_profits + (1-exit_proba)*( -exp_fcost + beta*exp_VF_new )
  
  # Return results 
  return(list(exp_VF = exp_VF_new, exit_proba = exit_proba, VF = VF, exp_fcost = exp_fcost))
}

# Test it 
test_VFI_EE <- VFI_EE(
  guess_exp_VF = (1/(1-beta))*profits_grid_EE$profits_NC, 
  param_EE = c("level_fcost" = 2500000, "scale_fcost" = 10000))

summary(test_VFI_EE$exit_proba)

#### Step 4: Construct objective function ####

## First: Need to create data moment: exit proba over z
firms_1997 <- left_join(x = firms_1997, y = firms_exit %>% select(PSID,exit_rob))
reg_firms_NC_exit <- lm(data = firms_1997 %>% filter(connected == "non-connected"),
                        formula = exit_rob ~ log(TFPQ_resid))
summary(reg_firms_NC_exit)

# residualize exit
OLS_resid_exit <- fixest::feols(
  fml = exit_rob ~ 1 | prov + state_owned_major + state_owned_partly + estyear_FE + industry_4digit, #
  data = firms_1997) 

# get residualized exit measure 
firms_1997$exit_resid <- firms_1997$exit_rob - predict(object = OLS_resid_exit, newdata = firms_1997) + mean(firms_1997$exit_rob)

# Use this regression as target!! 
reg_firms_NC_exit_resid <- lm(
  data = firms_1997 %>% filter(connected == "non-connected"),
  formula = exit_resid ~ log(TFPQ_resid))
summary(reg_firms_NC_exit_resid)

## Second: Find distribution of NC in 1997 over z_grid 
closest_grid_points_z_star_EE <- sapply(X = z_star_NC_baseline, FUN = function(x){which.min(abs(x - z_star_grid_EE))})
empirical_distrib_z_star_NC_EE <- tibble(z_star_grid_point = closest_grid_points_z_star_EE) %>% 
  group_by(z_star_grid_point) %>% 
  summarise(share = n()/length(closest_grid_points_z_star_EE))
empirical_distrib_z_star_NC_EE <- left_join(
  x = tibble(z_star_grid_point = c(1:length(z_star_grid_EE)), 
             z_star = z_star_grid_EE), 
  y = empirical_distrib_z_star_NC_EE) 
empirical_distrib_z_star_NC_EE$share <- ifelse(is.na(empirical_distrib_z_star_NC_EE$share), 0.0, empirical_distrib_z_star_NC_EE$share)

## Third: Construct objective 
objective_exit_process <- function(param_EE){
  
  print(param_EE)
  
  # Step 1: Solve for VF given param_EE 
  results_VF <- VFI_EE(
    guess_exp_VF = (1/(1-beta))*profits_grid_EE$profits_NC, 
    param_EE = param_EE, verbose = F) 
  
  # Step 2: Run regression of exit proba on log(z_star)
  model_data <- empirical_distrib_z_star_NC_EE 
  model_data$exit_proba <- results_VF$exit_proba[,1]
  reg_model_firms_NC_exit <- lm(
    data = model_data,
    formula = exit_proba ~ log(z_star), 
    weights = share)
  
  # Step 3: Construct objective 
  moment_coef1 <- (reg_model_firms_NC_exit$coefficients[[1]] - reg_firms_NC_exit_resid$coefficients[[1]])/reg_firms_NC_exit_resid$coefficients[[1]]
  moment_coef2 <- (reg_model_firms_NC_exit$coefficients[[2]] - reg_firms_NC_exit_resid$coefficients[[2]])/reg_firms_NC_exit_resid$coefficients[[2]]
  
  print(paste("Constant is:",reg_model_firms_NC_exit$coefficients[[1]], 
              ". Slope is:", reg_model_firms_NC_exit$coefficients[[2]]))
  
  objective <- moment_coef1^2 + moment_coef2^2 
  return(list(objective = objective, results_VF = results_VF))
}

#### Step 5: Find optimal exit parameters ####

# start with good initial guess 
initial_param_guess_exit_process <- c("level_fcost" = -52300000*1.5, "scale_fcost" = 2.5e7*1.5)
objective_exit_process(param_EE = initial_param_guess_exit_process)

## Start optim parallel 
cl <- makeCluster(detectCores() - 2, type="FORK")
setDefaultCluster(cl = cl)

# Find optimal parameters (starting from good initial guess & relatively tight bounds!)
optimal_param_exit_process <- optimParallel( 
  par = initial_param_guess_exit_process/initial_param_guess_exit_process, 
  fn = function(par){
    parameter <- par*initial_param_guess_exit_process
    results <- objective_exit_process(param_EE = parameter)
    print(paste("Objective is: ", results$objective))
    return(results$objective)
  },
  method = "L-BFGS-B",
  lower = c(0.5,0.5), 
  upper = c(1.2,1.2)
)

stopCluster(cl) 

optimal_param_exit_process$par*initial_param_guess_exit_process
optimal_param_exit_process$value # objective < 1e-8
optimal_param_exit_process$message # converged

# solve these 
optimal_param_exit_process_results <- objective_exit_process(param_EE = optimal_param_exit_process$par*initial_param_guess_exit_process)

# look at interquartile range of exit probabilities among producing NC 
optimal_param_exit_process_results$results_VF$exit_proba[which.min(abs(quantile(z_star_NC_baseline, probs=0.25) - z_star_grid_EE)),1] # 1st quartile TFPQ NC
optimal_param_exit_process_results$results_VF$exit_proba[which.min(abs(quantile(z_star_NC_baseline, probs=0.75) - z_star_grid_EE)),1] # 3rd quartile TFPQ NC

## Add these to main paper
fixed_cost_param <- tibble(
  level_fcost = (optimal_param_exit_process$par*initial_param_guess_exit_process)[[1]],
  scale_fcost = (optimal_param_exit_process$par*initial_param_guess_exit_process)[[2]],
)
# write_csv(x = fixed_cost_param, file = "Computation/data/fixed_cost_param.csv") 

#### Step 6: Find entry cost in line with zero profit condition ####

### to get unconditional expected value function, need unconditional distrib over z*

# draw from unconditional distribution 
z_star_uncond_distrib_EE <- exp(rnorm(n = 100000, 
                                      mean = optimal_param_prod_process$par[[2]]/(1-optimal_param_prod_process$par[[1]]), 
                                      sd = sqrt(optimal_param_prod_process$par[[3]]/(1-optimal_param_prod_process$par[[1]]^2))
))

# put this distribution on the grid 
closest_grid_points_z_star_uncond_distrib_EE <- sapply(X = z_star_uncond_distrib_EE, FUN = function(x){which.min(abs(x - z_star_grid_EE))})
z_star_uncond_distrib_EE_grid <- tibble(z_star_grid_point = closest_grid_points_z_star_uncond_distrib_EE) %>% 
  group_by(z_star_grid_point) %>% 
  summarise(share = n()/length(closest_grid_points_z_star_uncond_distrib_EE))
# as long as all points given, then can proceed with this (otherwise, make sure all grid points given)
# write_csv(x = z_star_uncond_distrib_EE_grid, file = "Computation/data/z_star_uncond_distrib.csv")

# compute expected entry value 
expected_entry_value <- sum(optimal_param_exit_process_results$results_VF$VF[,1]*z_star_uncond_distrib_EE_grid$share)

# How big is this? 
expected_entry_value/mean(firms_1997$routs) # about 14% of avg firm revenue 
expected_entry_value/9040 # in 000' USD: about 1.1mio USD (fairly large?) 

################ Baseline GE (to back out aggregate labor supply) ################

#### Find stationary distribution over z* #### 

# function to compute baseline stationary distribution 
find_baseline_stationary_distrib <- function(initial_distrib, exit_proba, crit = 1e-16, max_iter = 1000, verbose = F){
  
  # initialize objects 
  diff <- Inf 
  iter <- 1
  prev_distrib <- initial_distrib
  entry_distrib <- initial_distrib # assumes that initial distrib is unconditional one! 
  
  # start loop of iterating on distribution 
  while (diff > crit & iter < max_iter) {
    
    if(verbose){
      print(paste("Iteration",iter,"with difference:",diff))
    }
    
    # Step 1: Compute survivors from last period 
    survivor_distrib <- prev_distrib*(1-exit_proba)
    mass_exit <- sum(prev_distrib*exit_proba) # will add this back later! 
    
    # Step 2: Update their productivity 
    survivor_distrib_new <- (survivor_distrib %*% transition_z)[1,]
    
    # Step 3: Add new entrants (come from unconditional distribution)
    entrant_distrib <- entry_distrib*mass_exit # ensure that entry and exit always balance 
    
    # Step 4: put all together 
    new_distrib <- survivor_distrib_new + entrant_distrib
    
    # Step 5: update
    diff <- max(abs(new_distrib - prev_distrib))
    iter <- iter + 1 
    prev_distrib <- new_distrib
    
  }
  
  # return SS distribution 
  return(new_distrib)
}

# find stationary distribution 
baseline_SS_distribution <- find_baseline_stationary_distrib(
  initial_distrib = z_star_uncond_distrib_EE_grid$share, 
  exit_proba = optimal_param_exit_process_results$results_VF$exit_proba[,1], 
  verbose = T) 
# write_csv(x = tibble(share = baseline_SS_distribution), file = "Computation/data/baseline_SS_distribution.csv")

## Validation: Check TFPQ distribution of NC in data vs. SS in model 
tibble(z_star = z_star_grid_EE, 
       Model = baseline_SS_distribution, 
       Data = empirical_distrib_z_star_NC_EE$share) %>% 
  pivot_longer(cols = c(Model,Data), names_to = "Type", values_to = "Density") %>%
  filter(z_star <= 35 & z_star >= 5) %>% 
  ggplot(aes(x = z_star, y = Type, height = Density, group = Type, fill = Type)) + 
  geom_density_ridges(stat = "identity", scale = 2.5)
## Fit is not great!! Data features much less low-productive firms; potentially in line with more selection on entry, which is not modelled here. 

#### Use stationary distribution over z* to compute all aggregates #### 

optimal_param_exit_process_results$results_VF$exit_proba[,1]

compute_baseline_aggregates <- function(SS_distrib,exit_proba){
  
  # Get total (productive) inputs 
  productive_capital <- sum((1-vat_tax)*profits_grid_EE$revenue_expected*alpha_gross*SS_distrib/R)
  productive_labor <- sum((1-vat_tax)*profits_grid_EE$revenue_expected*beta_gross*SS_distrib)
  productive_intermediates <- sum(profits_grid_EE$revenue_expected*gamma_gross*SS_distrib)
  
  # Get total output (need to construct revenue output in data!) 
  total_output <- sum(profits_grid_EE$revenue_output_expected*SS_distrib)
  
  # Get total entry costs 
  mass_exit <- sum(SS_distrib*exit_proba) # which has to equal mass of entrants in SS 
  total_entry_costs <- expected_entry_value*mass_exit
  
  # Can construct total labor demand (which equals total labor supply in baseline equilibrium)
  total_labor_demand <- total_entry_costs + productive_labor
  
  # Get total rent-seeking activities 
  total_rent_seeking <- baseline_parameters[["proba_c"]]*sum(profits_grid_EE$rent_seeking_C_expected*SS_distrib)
  
  # total new investments (in SS)
  total_investments <- delta*productive_capital
  
  # total consumption (enforce aggregate resource constraint -- only true if closed economy or BOP = 0) 
  total_consumption <- total_output - total_investments - productive_intermediates - total_rent_seeking
  
  ## Could add HH budget constraint as consistency check for constructing consumption!! 
  
  # total tax revenues & subsidies 
  cit_revenue = sum(profits_grid_EE$profits_expected*SS_distrib)*(profit_tax/(1-profit_tax))
  vat_revenue = sum(vat_tax*((1-gamma_gross)*profits_grid_EE$revenue_expected - baseline_parameters[["proba_c"]]*profits_grid_EE$rent_seeking_C_expected)*SS_distrib)
  total_tax_revenues <- cit_revenue + vat_revenue
  total_subsidies <- sum(SS_distrib*profits_grid_EE$subsidies_expected)
  total_govt_budget <- total_tax_revenues - total_subsidies
  
  # Also compute total paid fixed costs 
  total_fixed_costs <- sum(SS_distrib*(1-exit_proba)*optimal_param_exit_process_results$results_VF$exp_fcost[,1])
  
  return(list(
    "productive_capital" = productive_capital,
    "productive_labor" = productive_labor,
    "productive_intermediates" = productive_intermediates,
    "total_output" = total_output,
    "total_entry_costs" = total_entry_costs,
    "total_labor_demand" = total_labor_demand,
    "total_rent_seeking" = total_rent_seeking,
    "total_investments" = total_investments,
    "total_consumption" = total_consumption,
    "total_tax_revenues" = total_tax_revenues,
    "total_subsidies" = total_subsidies, 
    "total_govt_budget" = total_govt_budget,
    "total_fixed_costs" = total_fixed_costs))
}

results_baseline_aggregates <- compute_baseline_aggregates(SS_distrib = baseline_SS_distribution, 
                                                           exit_proba = optimal_param_exit_process_results$results_VF$exit_proba[,1])

# About 0.16% of total resources on rent-seeking 
results_baseline_aggregates$total_rent_seeking/results_baseline_aggregates$total_output

# about 36.5% of output is consumed by HHs 
results_baseline_aggregates$total_consumption/results_baseline_aggregates$total_output

# taxes & subsidies: about 35% of corporate tax revenues are spend on subsidizing connected firms 
results_baseline_aggregates$total_subsidies/results_baseline_aggregates$total_tax_revenues

# Or alternatively: Tax revenue from corporate taxation could be 54% higher if (implicit) subsidies to connected firms were shut down
results_baseline_aggregates$total_tax_revenues/(results_baseline_aggregates$total_tax_revenues - results_baseline_aggregates$total_subsidies)

# Fixed costs: Net negative!! 
results_baseline_aggregates$total_fixed_costs/results_baseline_aggregates$total_output

# export 
# write_csv(x = as_tibble(results_baseline_aggregates), file = "Computation/data/baseline_aggregates.csv")


#################################
###### Validation of model ###### 
#################################

#### Data preparation #### 

# get main data 
model_results_df <- results_optimal_DRS_eps_cost$results_df %>% 
  mutate(
    intermediate_share = (optimal_m_R + gamma_gross*optimal_revenue)/optimal_revenue, 
    diff_intermediate_share = optimal_m_R/optimal_revenue
  ) 
mean(model_results_df$diff_intermediate_share) # 4pp

# also get DRS data 
model_DRS_results_df <- results_optimal_DRS_eps$results_df %>% 
  mutate(
    intermediate_share = (optimal_m_R + gamma_gross*optimal_revenue)/optimal_revenue, 
    diff_intermediate_share = optimal_m_R/optimal_revenue
  ) 
mean(model_DRS_results_df$diff_intermediate_share) # 4.65pp 

#### Step 1 (partly in Appendix): Distribution of rent-seeking and subsidies in model #### 

## Distribution plot: Rent-seeking (left)
plot_m_R_distrib <- model_results_df %>% 
  ggplot() + geom_point(aes(x = log(TFPQ_model), y = optimal_m_R/optimal_revenue), alpha = 0.1, size = 0.1) + 
  geom_hline(yintercept = 0, linetype = "longdash") + 
  ylab(label = "Rent-seeking cost share") + 
  xlab(label = "TFPQ (log)") + 
  theme_classic(base_size = 16, base_family = "LM Roman 10")

## Distribution plot: Subsidies (left)
plot_subsidy_distrib <- model_results_df %>% 
  ggplot() + geom_point(aes(x = log(TFPQ_model), y = optimal_subsidy), alpha = 0.1, size = 0.1) + 
  geom_hline(yintercept = 0, linetype = "longdash") + 
  ylab(label = "Subsidy rate") + 
  xlab(label = "TFPQ (log)") + 
  theme_classic(base_size = 16, base_family = "LM Roman 10")

ggarrange(
  plot_m_R_distrib, #  + theme(plot.caption = element_text(hjust=0.5, size=rel(1.2))) 
  plot_subsidy_distrib, #+ theme(plot.caption = element_text(hjust=0.5, size=rel(1.2))),
  ncol = 2, nrow = 1)  
ggsave(width = 12, height = 5, dpi = 200,
       "R/output/FigureA_baseline_m_R_and_subsidy_variation.png")

#### Step 2 Indirect evidence: differential intermediate input shares #### 

### Level differences ###

# median & mean (after residualizing): Connected firms have 5-8 pp. higher intermediate expenditure share 
firms_1997 %>% group_by(connected) %>% 
  summarise(med_intermediate_share = median(intermediate_share_resid),
            avg_intermediate_share = mean(intermediate_share_resid))

## Regression results (as reported in paper): require full MICRO DATA, not shown here

### Variance differences ###

### Compute differential variance in intermediate share 

# What about dispersion in intermediate input shares? Connected indeed slightly higher!
diff_var_interm_share <- firms_1997 %>% group_by(connected) %>% 
  summarise(var_intermediate_share = var(intermediate_share_resid))
diff_var_interm_share$var_intermediate_share[1] - diff_var_interm_share$var_intermediate_share[2]

## Bootstrap 95% confidence band for differential variance of intermediate share 
compute_bs_var_interm_share <- function(data_connect,data_nonconnect, bs_draws = 10000){
  
  ## Draw with replacement from intermediate input share distribution
  
  n_draws_C <- nrow(data_connect)
  n_draws_NC <- nrow(data_nonconnect)
  
  # set seeds
  set.seed(123456)
  bootstrap_seeds = rnorm(n = bs_draws, mean = 1000, sd = 100)
  
  # Get sampling function 
  take_sample <- Vectorize(function(data,seed,n_draws){
    set.seed(seed)
    return(sample(x = data, size = n_draws, replace = TRUE))
  }, vectorize.args = c("seed"))
  
  # bootstrap samples
  bs_share_C <- take_sample(
    data = data_connect$intermediate_share_resid, 
    seed = bootstrap_seeds, n_draws = n_draws_C)
  
  bs_share_NC <- take_sample(
    data = data_nonconnect$intermediate_share_resid, 
    seed = bootstrap_seeds, n_draws = n_draws_NC)
  
  ## Compute variances 
  
  variances_C <- apply(bs_share_C, 2, var)
  variances_NC <- apply(bs_share_NC, 2, var)
  
  # compute differential variances 
  diff_variances <- variances_C - variances_NC
  
  # return results
  return(diff_variances)
}

diff_var_interm_share_bs <- compute_bs_var_interm_share(
  data_connect = firms_1997 %>% filter(connected == "connected"),
  data_nonconnect = firms_1997 %>% filter(connected == "non-connected"),
  bs_draws = 10000)

# get 95% CI: ranges from -0.003 to 0.015
quantile(x = diff_var_interm_share_bs, probs = c(0.025,0.975))

# Model: Variance in intermediate share very similar between baseline & DRS model 
var(model_results_df$intermediate_share)
var(model_DRS_results_df$intermediate_share)

#### Step 2 Indirect evidence (Appendix 1): Composition of differential interm input shares (NOT HERE, REQUIRES FULL MICRO DATA) #### 
#### Step 2 Indirect evidence (Appendix 1): Distribution of R&D expenditure #### 

# Look at entire relative distribution in the data 

# R&D distribution 
plot_RandD_distrib <- firms_1997 %>% 
  mutate(
    connections = fct_rev(as.factor(connected)), 
    connected = case_when(
      connected == "connected" ~ "C", 
      connected == "non-connected" ~ "NC")) %>% 
  ggplot(aes(y = connected, group = connected, fill = connected)) + 
  geom_density_ridges(
    aes(x = RandD_share_resid,
        fill = connections),
    alpha = 0.8, color = "black", quantile_lines = TRUE, quantiles = 2, 
    show.legend = TRUE, bandwidth = 0.0001) + 
  labs(
    x = "R&D share",
    y = ""
  ) + 
  xlim(c(-0.001,0.005)) + 
  theme_classic(base_size = 16, base_family = "LM Roman 10") + 
  scale_fill_discrete(name = "Type:") + 
  theme(legend.position = "bottom")

# output results
ggsave(width = 6, height = 4, "R/output/relative_RandD_distribution.png")


#### Step 2 Indirect evidence (Appendix 2): Differences across blood vs. normal connected firms (NOT HERE, REQUIRES FULL MICRO DATA) #### 
#### Step 3: Direct evidence from Odebrecht case #### 

## Compare to Odebrecht case: Look at top 25% connected firms (in terms of size/TFPQ)

# Full distribution (share goes to zero for largest firms) 
model_results_df %>% 
  ggplot() + geom_point(aes(x = TFPQ_model, y = diff_intermediate_share))

# Look at top 25%: Avg & median are 1.56-1.57% (Reported: Avg of 0.016)
summary((model_results_df %>% 
           # order by TFPQ
           arrange(TFPQ_model) %>%  
           # drop cutoff*% of observations
           dplyr::slice((round(0.75*n()) + 1):n()))$diff_intermediate_share)

# DRS model: Look at top 25%: Avg & median is 5.1-5.3% (Reported: Avg of 0.051)
summary((model_DRS_results_df %>% 
           # order by TFPQ
           arrange(TFPQ_model) %>%  
           # drop cutoff*% of observations
           dplyr::slice((round(0.75*n()) + 1):n()))$diff_intermediate_share)

### Profit share Odebrecht case vs model ### 

# Overall: Mean is 43.7%  (53.2% in DRS model)
summary(model_results_df$optimal_m_R/model_results_df$optimal_profits_C)
summary(model_DRS_results_df$optimal_m_R/model_DRS_results_df$optimal_profits_C)

# Restrict to top 25%: 13.8% 
model_results_df %>% 
  # order by TFPQ
  arrange(TFPQ_model) %>%  
  # drop cutoff*% of observations
  dplyr::slice((round(0.75*n()) + 1):n()) %>% 
  # look at avg profit share 
  summarise(avg_profit_share = mean(optimal_m_R/optimal_profits_C))

# DRS model: Restrict to top 25%: 62.9% (far off!)
model_DRS_results_df %>% 
  # order by TFPQ
  arrange(TFPQ_model) %>%  
  # drop cutoff*% of observations
  dplyr::slice((round(0.75*n()) + 1):n()) %>% 
  # look at avg profit share 
  summarise(avg_profit_share = mean(optimal_m_R/optimal_profits_C))

### Subsidy estimate: Total costs after renegotiation / initial cost (assume competitive price) 

# Overall: (taken from Total (2001-2016) in Table 2)
134051/106384 # about 26% price markup 

# average across projects (from Table 2: Indiv ratios): 42% 
mean(c(13343/4141, 2134/1828, 5853/4588, 4074/3466, 384/384, 3059/2155, 10391/8839, 17253/14904, 77559/66080))

# in model: 42% (super close!) 
(model_results_df %>% 
    # order by TFPQ
    arrange(TFPQ_model) %>%  
    # drop cutoff*% of observations
    dplyr::slice((round(0.75*n()) + 1):n()) %>% 
    # look at avg profit share 
    summarise(avg_subsidy_rate = mean(1 + optimal_subsidy)))[[1]]

# in DRS model: 53% (less close!) 
(model_DRS_results_df %>% 
    # order by TFPQ
    arrange(TFPQ_model) %>%  
    # drop cutoff*% of observations
    dplyr::slice((round(0.75*n()) + 1):n()) %>% 
    # look at avg profit share 
    summarise(avg_subsidy_rate = mean(1 + optimal_subsidy)))[[1]]

## Full distribution of subsidies: Avg subsidy rate of 44%  
model_results_df %>% 
  # look at avg profit share 
  summarise(avg_subsidy_rate = mean(1 + optimal_subsidy))

##########################################
###### Estimation of model with DRS ######
##########################################

################ DRS results Productivity Process ################

#### Model & solve for optimal parameters ####

# save parameters 
DRS_parameters <- c(
  optimal_DRS_eps$par*initial_parameter_DRS_eps,
  "fixed_cost" = 0.0, 
  "proba_c" = nrow_connected/(nrow_connected+nrow_non_connected), 
  "rho" = ratio_beta_sigma_DRS/sqrt((ratio_beta_sigma_DRS)^2 + 1)
)

# Write objective function for estimating productivity process 
objective_productivity_process_DRS <- function(
    productivity_param,
    parameters = DRS_parameters){
  
  ##### Step 0: Prepare data given parameters (can move this out!) #####
  
  print(productivity_param)
  
  # extract parameters
  rho_z <- productivity_param["rho_z"]
  mean_zeta_star <- productivity_param["mean_zeta_star"]
  var_zeta_star <- productivity_param["var_zeta_star"]
  
  ##### Step 1: Update marginal productivity distribution of survivors given productivity_param #####
  
  # draw new log_z_star (could also have many more rnorm draws!) 
  set.seed(12345)
  log_z_star_new <- rho_z*log_z_star_NC_survivors + rnorm(n = length(log_z_star_NC_survivors), mean = mean_zeta_star, sd = sqrt(var_zeta_star))
  
  ##### Step 2: Draw new epsilon based on new z_star #####
  
  # Draw new epsilon based on new z_star 
  set.seed(12345)
  new_epsilon <- rnorm(n = length(log_z_star_NC_survivors), 
                       mean = parameters["alpha_eps"] + parameters["beta_eps"]*log_z_star_new, 
                       sd = sqrt(parameters["variance_eps_z"])
  )
  
  # Check that epsilon are positive 
  if(sum(new_epsilon < 0) > 0){
    print(paste("Warning: There are ", sum(new_epsilon < 0), " negative epsilon with min ", round(min(new_epsilon),4), ". Setting all negative ones to 1e-16."))
  }
  
  # ensure that epsilon are positive 
  new_epsilon[new_epsilon < 0] <- 1e-16 
  
  ##### Step 3: Solve for optimal policies/subsidies ##### 
  
  results_df <- compute_subsidy_DRS_eps_model_grid(
    z_star_grid = exp(log_z_star_new), 
    epsilon_grid = new_epsilon,
    z_tilde_grid = z_tilde_baseline(z_star = exp(log_z_star_new)), 
    theta = DRS_parameters[["theta"]])
  
  ##### Step 4: Construct objective ##### 
  
  # create regression dataset
  reg_df <- rbind(
    # first dataset only non-connected 
    results_df %>% select(z_star) %>% 
      mutate(weight = 1 - unname(parameters["proba_c"]),
             TFPQ_prev = exp(log_z_star_NC_survivors)) %>%  
      rename(TFPQ = z_star), 
    # second dataset only connected 
    results_df %>% select(TFPQ_model) %>% 
      mutate(weight = unname(parameters["proba_c"]), 
             TFPQ_prev = exp(log_z_star_NC_survivors)) %>%  
      rename(TFPQ = TFPQ_model)
  )
  
  # Now run regression 
  main_regression <- lm(formula = log(TFPQ) ~ log(TFPQ_prev), data = reg_df, weights = reg_df$weight)
  
  # construct objective 
  error_constant <- abs(main_regression$coefficients[[1]] - lm_prod_process$coefficients[[1]])/lm_prod_process$coefficients[[1]]
  error_rho <- abs(main_regression$coefficients[[2]] - lm_prod_process$coefficients[[2]])/lm_prod_process$coefficients[[2]]
  error_var <- abs(matrixStats::weightedVar(x = main_regression$residuals, w = reg_df$weight) - var(lm_prod_process$residuals))/var(lm_prod_process$residuals)
  
  # print(main_regression$coefficients[[1]])
  # print(main_regression$coefficients[[2]])
  # print(var(main_regression$residuals))
  
  # full objective is sum of errors 
  objective <- sum(error_constant^2 + error_rho^2 + error_var^2)
  
  # return 
  return(list(objective = objective, 
              moment1 = main_regression$coefficients[[1]], 
              moment2 = main_regression$coefficients[[2]], 
              moment3 = matrixStats::weightedVar(x = main_regression$residuals, w = reg_df$weight)))
}

DRS_initial_param_guess_prod_process <- c(
  "rho_z" = lm_prod_process$coefficients[[2]], 
  "mean_zeta_star" = lm_prod_process$coefficients[[1]], 
  "var_zeta_star" = var(lm_prod_process$residuals)
) 

# test 
objective_productivity_process_DRS(productivity_param = DRS_initial_param_guess_prod_process)

## Start optim parallel 
cl <- makeCluster(detectCores() - 2, type="FORK")
setDefaultCluster(cl = cl)

# Find optimal parameters (starting from good initial guess & relatively tight bounds!)
DRS_optimal_param_prod_process <- optimParallel( 
  par = DRS_initial_param_guess_prod_process, 
  fn = function(par){
    result <- objective_productivity_process_DRS(productivity_param = par)
    print(paste("Objective is: ", result$objective))
    return(result$objective)
  },
  method = "L-BFGS-B",
  lower = c(0.9,0.06,0.01), 
  upper = c(1.0,0.15,0.03)
)

stopCluster(cl) 

# converged! 
DRS_optimal_param_prod_process$par
DRS_optimal_param_prod_process$value # obj < 1e-9
DRS_optimal_param_prod_process$message # converged 

################ Baseline results Exit/Entry processes ################

#### Step 1: Discretize z and get transition matrix over z ####

# from Tauchen -- could also use Rouwenhorst!
library(Rtauchen)

# get transition matrix 
DRS_transition_z <- Rtauchen(
  ne = 1000, 
  ssigma_eps = sqrt(DRS_optimal_param_prod_process$par[[3]]), 
  llambda_eps = DRS_optimal_param_prod_process$par[[1]], 
  m = 2.5)
# write_csv(x = as_tibble(DRS_transition_z), file = "Computation/data/DRS_transition_matrix_z.csv")

# get corresponding grid (and make sure mean is correct!) 
DRS_mean_uncond_log_z_star <- (DRS_optimal_param_prod_process$par[[2]]/(1-DRS_optimal_param_prod_process$par[[1]]))
DRS_log_z_star_grid_EE <- as.vector(Tgrid(
  ne = 1000, 
  ssigma_eps = sqrt(DRS_optimal_param_prod_process$par[[3]]), 
  llambda_eps = DRS_optimal_param_prod_process$par[[1]], 
  m = 2.5) + DRS_mean_uncond_log_z_star)

DRS_z_star_grid_EE <- exp(DRS_log_z_star_grid_EE)
# write_csv(x = tibble(z_star = DRS_z_star_grid_EE), file = "Computation/data/DRS_z_star_grid.csv")

# test that this gives correct numbers: looks good! 

DRS_test_prod_process <- tibble(log_z_star = DRS_log_z_star_grid_EE,
                                log_z_star_next = NA)
for(row in c(1:nrow(DRS_test_prod_process))){
  DRS_test_prod_process$log_z_star_next[row] <- sample(x = DRS_log_z_star_grid_EE, size = 1, replace = T, prob = DRS_transition_z[row,])
}

DRS_test_lm_prod_process <- lm(data = DRS_test_prod_process, formula = log_z_star_next ~ log_z_star)
summary(DRS_test_lm_prod_process) # coefficients in line with mean & persistence 
sd(DRS_test_lm_prod_process$residuals) # sd of residual also in line
rm(DRS_test_lm_prod_process)
rm(DRS_test_prod_process)

#### Step 2: For each z, solve for optimal profits including for grid over epsilon ####

# create array to save (expected) profits conditional on z (both for NC & C)
DRS_profits_grid_EE <- tibble(
  z_star = DRS_z_star_grid_EE,
  z_tilde = z_tilde_baseline(z_star = DRS_z_star_grid_EE), 
  profits_C_expected = rep(NA, length(DRS_z_star_grid_EE)),
  profits_NC = rep(NA, length(DRS_z_star_grid_EE))
)

DRS_get_expected_profits_C_EE <- function(row, n_eps){ 
  
  ### Substep 1: Draw epsilon conditional on z_star & the probability 
  set.seed(12345)
  epsilon <- rnorm(
    n = n_eps, 
    mean = DRS_parameters[["alpha_eps"]] + DRS_parameters[["beta_eps"]]*DRS_log_z_star_grid_EE[row], 
    sd = sqrt(DRS_parameters[["variance_eps_z"]])
  )
  
  # ensure that epsilon is always positive
  epsilon[epsilon < 0] <- 1e-16 
  
  # get probability/density 
  proba_epsilon <- dnorm(x = epsilon, 
                         mean = DRS_parameters[["alpha_eps"]] + DRS_parameters[["beta_eps"]]*DRS_log_z_star_grid_EE[row], 
                         sd = sqrt(DRS_parameters[["variance_eps_z"]]))
  
  # normalize probability to make sure it sums to 1  
  proba_epsilon <- proba_epsilon/sum(proba_epsilon)
  
  ### Substep 2: Get optimal profits 
  
  results_df <- compute_subsidy_DRS_eps_model_grid(
    z_star_grid = rep(DRS_z_star_grid_EE[row], n_eps), 
    epsilon_grid = epsilon, 
    z_tilde_grid = rep(DRS_profits_grid_EE$z_tilde[row], n_eps),
    theta = DRS_parameters[["theta"]])
  
  # get expected profits by taking weighted mean across all possible epsilon 
  expected_profits_C <- sum(results_df$optimal_profits_C*proba_epsilon)
  expected_profits_NC <- sum(results_df$optimal_profits_NC*proba_epsilon)
  
  # collect further objects needed for later 
  expected_m_R_C <- sum(results_df$optimal_m_R*proba_epsilon)
  expected_subsidies_C <- sum((results_df$optimal_revenue)*(results_df$optimal_subsidy/(1 + results_df$optimal_subsidy))*proba_epsilon)
  # add VAT revenue here 
  # add profit tax revenue here 
  expected_revenue_output_C <- sum(results_df$revenue_output_C*proba_epsilon)
  expected_revenue_output_NC <- sum(results_df$revenue_output_NC*proba_epsilon)
  expected_subsidy_rate_C <- sum(results_df$optimal_subsidy*proba_epsilon)
  expected_revenue_C <- sum(results_df$optimal_revenue*proba_epsilon)
  expected_revenue_NC <- sum(results_df$z_tilde*proba_epsilon)
  
  return(c("expected_profits_C" = expected_profits_C, 
           "expected_profits_NC" = expected_profits_NC, 
           "expected_m_R_C"= expected_m_R_C,
           "expected_subsidies_C" = expected_subsidies_C,
           "expected_revenue_output_C" = expected_revenue_output_C, 
           "expected_revenue_output_NC" = expected_revenue_output_NC, 
           "expected_subsidy_rate_C" = expected_subsidy_rate_C,
           "expected_revenue_C" = expected_revenue_C, 
           "expected_revenue_NC" = expected_revenue_NC))
}

# compute profits (takes a moment!) & other optimal choices 
DRS_profits_matrix_EE <- sapply(X = c(1:length(DRS_z_star_grid_EE)), FUN = function(x){DRS_get_expected_profits_C_EE(row = x, n_eps = 100)})
DRS_profits_grid_EE$profits_C_expected <- DRS_profits_matrix_EE[1,]
DRS_profits_grid_EE$profits_NC <- DRS_profits_matrix_EE[2,]
DRS_profits_grid_EE$profits_expected <- DRS_parameters[["proba_c"]]*DRS_profits_grid_EE$profits_C_expected + (1-DRS_parameters[["proba_c"]])*DRS_profits_grid_EE$profits_NC
DRS_profits_grid_EE$rent_seeking_C_expected <- DRS_profits_matrix_EE[3,]
DRS_profits_grid_EE$subsidies_expected <- DRS_parameters[["proba_c"]]*DRS_profits_matrix_EE[4,]
DRS_profits_grid_EE$revenue_output_expected <- DRS_parameters[["proba_c"]]*DRS_profits_matrix_EE[5,] + (1-DRS_parameters[["proba_c"]])*DRS_profits_matrix_EE[6,]
DRS_profits_grid_EE$revenue_C_expected <- DRS_profits_matrix_EE[8,]
DRS_profits_grid_EE$revenue_NC <- DRS_profits_matrix_EE[9,]
DRS_profits_grid_EE$revenue_expected <- DRS_parameters[["proba_c"]]*DRS_profits_grid_EE$revenue_C_expected + (1-DRS_parameters[["proba_c"]])*DRS_profits_grid_EE$revenue_NC
DRS_profits_grid_EE$subsidy_rate_C_expected <- DRS_profits_matrix_EE[7,]

# can export this 
# write_csv(x = DRS_profits_grid_EE, file = "Computation/data/DRS_profits_grid_baseline.csv")

#### Step 3: VFI: Solve expected VF over z conditional on guess for Gumbel parameters ####

# VFI function 
DRS_VFI_EE <- function(guess_exp_VF,param_EE,crit = 1e-6,verbose=T){
  
  ### Step 0: Prepare objects 
  
  # get parameters 
  level_fcost <- param_EE[["level_fcost"]]
  scale_fcost <- param_EE[["scale_fcost"]]
  
  # initialize objects 
  exp_VF_new <- guess_exp_VF
  exp_profits <- DRS_profits_grid_EE$profits_expected
  
  # create function to compute expected fixed costs 
  compute_exp_fcost <- function(exit_proba,exp_VF){
    
    # make sure that gamma incomplete is robust regarding "too small" values 
    gamma_incomplete <- case_when(
      # exit_proba too close to 1 
      -log(exit_proba) < 1e-250 ~ expint::gammainc(a = 0,x = 1e-250),
      # exit_proba too close to 0 
      exit_proba < 1e-250 ~ 0.0, 
      .default = expint::gammainc(a = 0,x = -log(exit_proba))
    )
    
    # compute expected fixed cost 
    exp_fcost <- beta*exp_VF*exit_proba - scale_fcost*gamma_incomplete
    
    return(exp_fcost)
  }
  
  ### Step 1: While loop
  iteration <- 1 
  VF_diff <- Inf 
  
  while (VF_diff > crit) {
    
    # print progress
    if(verbose){
      print(paste("Iteration",iteration,"with difference:",VF_diff))
    }
    
    # get exit probability given new guess 
    exit_proba <- 1 - exp(-exp(-(beta*exp_VF_new - level_fcost)/scale_fcost))
    exp_fcost <- compute_exp_fcost(exit_proba = exit_proba, exp_VF = exp_VF_new)
    
    # compute new VF (over all z)
    VF_new <- exp_profits + (1-exit_proba)*( -exp_fcost + beta*exp_VF_new )
    
    # save old results 
    exp_VF_old <- exp_VF_new
    
    # compute new expected VF 
    exp_VF_new <- DRS_transition_z %*% VF_new
    
    # update iteration & compute difference 
    iteration <- iteration + 1 
    VF_diff <- max(abs(exp_VF_old - exp_VF_new))
    
  }
  
  # update results
  exit_proba <- 1 - exp(-exp(-(beta*exp_VF_new - level_fcost)/scale_fcost))
  exp_fcost <- compute_exp_fcost(exit_proba = exit_proba, exp_VF = exp_VF_new)
  VF <- exp_profits + (1-exit_proba)*( -exp_fcost + beta*exp_VF_new )
  
  # Return results 
  return(list(exp_VF = exp_VF_new, exit_proba = exit_proba, VF = VF, exp_fcost = exp_fcost))
}

# Test it 
DRS_test_VFI_EE <- DRS_VFI_EE(
  guess_exp_VF = (1/(1-beta))*DRS_profits_grid_EE$profits_NC, 
  param_EE = c("level_fcost" = 2500000, "scale_fcost" = 10000))

summary(DRS_test_VFI_EE$exit_proba)

#### Step 4: Construct objective function ####

## Find distribution of NC in 1997 over z_grid 
DRS_closest_grid_points_z_star_EE <- sapply(X = z_star_NC_baseline, FUN = function(x){which.min(abs(x - DRS_z_star_grid_EE))})
DRS_empirical_distrib_z_star_NC_EE <- tibble(z_star_grid_point = DRS_closest_grid_points_z_star_EE) %>% 
  group_by(z_star_grid_point) %>% 
  summarise(share = n()/length(DRS_closest_grid_points_z_star_EE))
DRS_empirical_distrib_z_star_NC_EE <- left_join(
  x = tibble(z_star_grid_point = c(1:length(DRS_z_star_grid_EE)), 
             z_star = DRS_z_star_grid_EE), 
  y = DRS_empirical_distrib_z_star_NC_EE) 
DRS_empirical_distrib_z_star_NC_EE$share <- ifelse(is.na(DRS_empirical_distrib_z_star_NC_EE$share), 0.0, DRS_empirical_distrib_z_star_NC_EE$share)

## Next: Construct objective 
DRS_objective_exit_process <- function(param_EE){
  
  print(param_EE)
  
  # Step 1: Solve for VF given param_EE 
  results_VF <- DRS_VFI_EE(
    guess_exp_VF = (1/(1-beta))*DRS_profits_grid_EE$profits_NC, 
    param_EE = param_EE, verbose = F) 
  
  # Step 2: Run regression of exit proba on log(z_star)
  model_data <- DRS_empirical_distrib_z_star_NC_EE 
  model_data$exit_proba <- results_VF$exit_proba[,1]
  reg_model_firms_NC_exit <- lm(
    data = model_data,
    formula = exit_proba ~ log(z_star), 
    weights = share)
  
  # Step 3: Construct objective 
  moment_coef1 <- (reg_model_firms_NC_exit$coefficients[[1]] - reg_firms_NC_exit_resid$coefficients[[1]])/reg_firms_NC_exit_resid$coefficients[[1]]
  moment_coef2 <- (reg_model_firms_NC_exit$coefficients[[2]] - reg_firms_NC_exit_resid$coefficients[[2]])/reg_firms_NC_exit_resid$coefficients[[2]]
  
  print(paste("Constant is:",reg_model_firms_NC_exit$coefficients[[1]], 
              ". Slope is:", reg_model_firms_NC_exit$coefficients[[2]]))
  
  objective <- moment_coef1^2 + moment_coef2^2 
  return(list(objective = objective, results_VF = results_VF))
}

#### Step 5: Find optimal exit parameters ####

# start with good initial guess 
DRS_initial_param_guess_exit_process <- c("level_fcost" = -52300000*1.5, "scale_fcost" = 2.5e7*1.5)
DRS_objective_exit_process(param_EE = DRS_initial_param_guess_exit_process)

## Start optim parallel 
cl <- makeCluster(detectCores() - 2, type="FORK")
setDefaultCluster(cl = cl)

# Find optimal parameters (starting from good initial guess & relatively tight bounds!)
DRS_optimal_param_exit_process <- optimParallel( 
  par = DRS_initial_param_guess_exit_process/DRS_initial_param_guess_exit_process, 
  fn = function(par){
    parameter <- par*DRS_initial_param_guess_exit_process
    results <- DRS_objective_exit_process(param_EE = parameter)
    print(paste("Objective is: ", results$objective))
    return(results$objective)
  },
  method = "L-BFGS-B",
  lower = c(0.5,0.5), 
  upper = c(1.2,1.2)
)

stopCluster(cl) 

DRS_optimal_param_exit_process$par*DRS_initial_param_guess_exit_process # interior solution
DRS_optimal_param_exit_process$value # close to zero
DRS_optimal_param_exit_process$message # converged! 

# solve these 
DRS_optimal_param_exit_process_results <- DRS_objective_exit_process(param_EE = DRS_optimal_param_exit_process$par*DRS_initial_param_guess_exit_process)

# look at interquartile range of exit probabilities among producing NC (same as baseline!)
DRS_optimal_param_exit_process_results$results_VF$exit_proba[which.min(abs(quantile(z_star_NC_baseline, probs=0.25) - DRS_z_star_grid_EE)),1] # 1st quartile TFPQ NC
DRS_optimal_param_exit_process_results$results_VF$exit_proba[which.min(abs(quantile(z_star_NC_baseline, probs=0.75) - DRS_z_star_grid_EE)),1] # 3rd quartile TFPQ NC

## Add these to main paper
DRS_fixed_cost_param <- tibble(
  level_fcost = (DRS_optimal_param_exit_process$par*DRS_initial_param_guess_exit_process)[[1]],
  scale_fcost = (DRS_optimal_param_exit_process$par*DRS_initial_param_guess_exit_process)[[2]],
)
# write_csv(x = DRS_fixed_cost_param, file = "Computation/data/DRS_fixed_cost_param.csv") 

#### Step 6: Find entry cost in line with zero profit condition ####

### to get unconditional expected value function, need unconditional distrib over z*

# draw from unconditional distribution 
DRS_z_star_uncond_distrib_EE <- exp(rnorm(n = 100000, 
                                          mean = DRS_optimal_param_prod_process$par[[2]]/(1-DRS_optimal_param_prod_process$par[[1]]), 
                                          sd = sqrt(DRS_optimal_param_prod_process$par[[3]]/(1-DRS_optimal_param_prod_process$par[[1]]^2))
))

# put this distribution on the grid 
DRS_closest_grid_points_z_star_uncond_distrib_EE <- sapply(X = DRS_z_star_uncond_distrib_EE, FUN = function(x){which.min(abs(x - DRS_z_star_grid_EE))})
DRS_z_star_uncond_distrib_EE_grid <- tibble(z_star_grid_point = DRS_closest_grid_points_z_star_uncond_distrib_EE) %>% 
  group_by(z_star_grid_point) %>% 
  summarise(share = n()/length(DRS_closest_grid_points_z_star_uncond_distrib_EE))
# as long as all points given, then can proceed with this (otherwise, make sure all grid points given)
# write_csv(x = DRS_z_star_uncond_distrib_EE_grid, file = "Computation/data/DRS_z_star_uncond_distrib.csv")

# compute expected entry value 
DRS_expected_entry_value <- sum(DRS_optimal_param_exit_process_results$results_VF$VF[,1]*DRS_z_star_uncond_distrib_EE_grid$share)

# How big is this? Slightly smaller than baseline estimate 
DRS_expected_entry_value/mean(firms_1997$routs) # about 12% of avg firm revenue 
DRS_expected_entry_value/9040 # in 000' USD: about 980k USD (fairly large?) 

#### Export all parameters #### 

# save as dataframe (to read in Julia)
DRS_parameters_df <- tibble(
  "alpha_eps" = (optimal_DRS_eps$par*initial_parameter_DRS_eps)[[1]],
  "beta_eps" = (optimal_DRS_eps$par*initial_parameter_DRS_eps)[[2]],
  "variance_eps_z" = (optimal_DRS_eps$par*initial_parameter_DRS_eps)[[3]],
  "cost_level" = 0.0,
  "fixed_cost" = 0.0, 
  "theta" = (optimal_DRS_eps$par*initial_parameter_DRS_eps)[[4]], 
  "cost_elasticity" = 0.0, 
  "proba_c" = nrow_connected/(nrow_connected+nrow_non_connected), 
  "rho" = unname(ratio_beta_sigma_DRS/sqrt((ratio_beta_sigma_DRS)^2 + 1)),
  "level_fcost" = (DRS_optimal_param_exit_process$par*DRS_initial_param_guess_exit_process)[[1]],
  "scale_fcost" = (DRS_optimal_param_exit_process$par*DRS_initial_param_guess_exit_process)[[2]],
  "rho_z" = DRS_optimal_param_prod_process$par[[1]],
  "mean_zeta_star" = DRS_optimal_param_prod_process$par[[2]],
  "var_zeta_star" = DRS_optimal_param_prod_process$par[[3]]
)
# write_csv(x = DRS_parameters_df, file = "Computation/data/DRS_parameters.csv")


###########################################
###### Section 5.1 Extension: Wedges ######
###########################################

#### Step 1: measure (residualized) wedges ####

# Overview of residualized labor and capital shares 
summary(firms_1997$labor_share_resid) 
summary(firms_1997$capital_share_resid)

# make sure that residualized input cost shares are reasonable by winsorizing ends!  
firms_1997 <- firms_1997 %>% 
  mutate(
    capital_share_resid = case_when(
      capital_share_resid < quantile(capital_share_resid, probs = 0.1) ~ unname(quantile(capital_share_resid, probs = 0.1)),
      capital_share_resid > quantile(capital_share_resid, probs = 0.9) ~ unname(quantile(capital_share_resid, probs = 0.9)),
      .default = capital_share_resid),
    labor_share_resid = case_when(
      labor_share_resid < quantile(labor_share_resid, probs = 0.1, na.rm = T) ~ unname(quantile(labor_share_resid, probs = 0.1, na.rm = T)),
      labor_share_resid > quantile(labor_share_resid, probs = 0.9, na.rm = T) ~ unname(quantile(labor_share_resid, probs = 0.9, na.rm = T)),
      .default = labor_share_resid)
  )

summary(firms_1997$labor_share_resid) 
summary(firms_1997$capital_share_resid)

## Compute wedges 
firms_1997$capital_wedge = alpha_gross*(1-vat_tax)*(firms_1997$capital_share_resid^(-1)) - 1 
firms_1997$labor_wedge = beta_gross*(1-vat_tax)*(firms_1997$labor_share_resid^(-1)) - 1 

## Look at distribution of labor wedges, also for connected vs non-connected firms 

# interquartile range from 40% subsidy to 30% tax rate 
summary(firms_1997$labor_wedge)

# interquartile range from 
firms_1997 %>% filter(!is.na(labor_wedge)) %>% 
  group_by(connected) %>% 
  summarise(iqr_labor_wedge = quantile(x = labor_wedge, probs = c(0.25,0.75)))

# Find that connected firms are differentially distorted (face higher implicit cost of labor!) 
firms_1997 %>% filter(!is.na(labor_wedge)) %>%
  group_by(connected) %>% 
  summarise(med_labor_wedge = median(labor_wedge),
            avg_labor_wedge = mean(labor_wedge))
# difference is large: about 15 percentage points! 

# in terms of labor share this means median/mean difference of roughly 2.2-3.6 percentage points  
firms_1997 %>% filter(!is.na(labor_share_resid)) %>%
  group_by(connected) %>% 
  summarise(med_labor_share_resid = median(labor_share_resid),
            avg_labor_share_resid = mean(labor_share_resid))

# check 5th and 95th percentiles for connected and non-connected firms
quantile(firms_1997$labor_wedge[firms_1997$connected == "non-connected"], probs = c(0.05,0.95), na.rm = T)
quantile(firms_1997$labor_wedge[firms_1997$connected == "connected"], probs = c(0.05,0.95), na.rm = T)


## Look at distribution of capital wedges, also for connected vs non-connected firms 

# interquartile range from 55% subsidy to 75% tax rate 
firms_1997 %>% filter(!is.na(capital_wedge)) %>% 
  group_by(connected) %>% 
  summarise(iqr_capital_wedge = quantile(x = capital_wedge, probs = c(0.25,0.75)))

# Find that connected firms are differentially distorted (face higher implicit cost of capital!) 
firms_1997 %>% filter(!is.na(capital_wedge)) %>%
  group_by(connected) %>% 
  summarise(med_capital_wedge = median(capital_wedge),
            avg_capital_wedge = mean(capital_wedge))
# difference is even larger: more than 20 percentage points! 

# in terms of capital share this means median/mean difference of roughly 2.6-3.7 percentage points  
firms_1997 %>% filter(!is.na(capital_share_resid)) %>%
  group_by(connected) %>% 
  summarise(med_capital_share_resid = median(capital_share_resid),
            avg_capital_share_resid = mean(capital_share_resid))

# check 5th and 95th percentiles for connected and non-connected firms
quantile(firms_1997$capital_wedge[firms_1997$connected == "non-connected"], probs = c(0.05,0.95), na.rm = T)
quantile(firms_1997$capital_wedge[firms_1997$connected == "connected"], probs = c(0.05,0.95), na.rm = T)

### Dispersion in wedges? 

# Labor wedge is less dispersed? About 10% less dispersed
firms_1997 %>% filter(!is.na(labor_wedge)) %>%
  group_by(connected) %>% 
  summarise(var_labor_wedge = var(labor_wedge))

# capital wedge is about 32% more dispersed
firms_1997 %>% filter(!is.na(capital_wedge)) %>%
  group_by(connected) %>% 
  summarise(var_capital_wedge = var(capital_wedge))

#### Step 2: measure (residualized) TFPQ ####

## TFPQ variation (directly import residual TFPQ here to ensure micro data cannot be identified)

# # compute TFPQ with wedges 
# firms_1997 <- firms_1997 %>% 
#   mutate(
#     log_TFPQ_wedges = log(routs) - alpha_gross*log(alpha_gross*(1-vat_tax)*routs/(R*(1+capital_wedge))) - beta_gross*log(beta_gross*(1-vat_tax)*routs/(1+labor_wedge)) - gamma_gross*log(gamma_gross*routs), 
#     TFPQ_wedges = exp(log_TFPQ_wedges))
# 
# # now residualize TFPQ wedges
# OLS_resid_TFPQ_wedges <- fixest::feols(
#   fml = log_TFPQ_wedges ~ 1 | prov + state_owned_major + state_owned_partly + estyear_FE + industry_4digit, #
#   data = firms_1997)

# get residualized TFPQ measure 
firms_1997$TFPQ_wedges_resid <- exp(firms_1997$log_TFPQ_wedges_resid)

#### Plot TFPQ-wedges quantile ratio & relative distributions ####

# TFPQ_wedges distribution 
plot_TFPQ_wedges_distrib <- firms_1997 %>% 
  mutate(
    connections = fct_rev(as.factor(connected)), 
    connected = case_when(
      connected == "connected" ~ "C", 
      connected == "non-connected" ~ "NC")) %>% 
  ggplot(aes(y = connected, group = connected, fill = connected)) + 
  geom_density_ridges(
    aes(x = log_TFPQ_wedges_resid, # divide by 9040 to show in 2010 USD
        fill = connections),
    alpha = 0.8, color = "black", quantile_lines = TRUE, quantiles = 2, 
    show.legend = TRUE, bandwidth = 0.075) + 
  labs(
    x = "log TFPQ",
    y = ""
  ) + 
  theme_classic(base_size = 16, base_family = "LM Roman 10") + 
  scale_fill_discrete(name = "Type:") + 
  theme(legend.position = "bottom")

### Right panel: TFPQ_wedges_QR distribution implied by resid TFPQ_wedges 

# Get baseline TFPQ_wedges_QR function
compute_baseline_TFPQ_wedges_QR <- function(data_connect,data_nonconnect, cutoff){
  
  # Arrange from low to high
  data_connect <- data_connect %>%
    arrange(TFPQ_wedges_resid)
  
  ## First get restricted productivity distribution
  
  ### Need to have specified cutoff! ### 
  
  # Now restrict normally 
  restricted_productivity_distribution <- data_nonconnect %>% 
    # remove NAs 
    filter(!is.na(TFPQ_wedges_resid)) %>% 
    # only look at TFPQ_wedges
    select(c(TFPQ_wedges_resid)) %>% 
    # order by TFPQ_wedges
    arrange(TFPQ_wedges_resid) %>% 
    # drop cutoff*% of observations
    dplyr::slice(round(cutoff*n()):n())
  
  restricted_productivity_distribution <- restricted_productivity_distribution$TFPQ_wedges_resid
  
  # Now get quantiles (weighted by rho if supplied)
  z_connected <- unname(quantile(
    x = restricted_productivity_distribution,
    probs = c(1:nrow_connected)/(nrow_connected+1),
  ))
  TFPQ_wedges_QR_connected <- data_connect$TFPQ_wedges_resid/z_connected - 1 
  
  # return results
  return(list(productivity = z_connected, TFPQ_wedges_QR = TFPQ_wedges_QR_connected))
}

# Get bootstrapping TFPQ_wedges_QR function 
compute_bs_baseline_TFPQ_wedges_QR <- function(data_connect,data_nonconnect, cutoff){
  
  # create productivity variable
  data_connect$productivity <- NA
  
  # Arrange from high to low
  data_connect <- data_connect %>%
    arrange(desc(TFPQ_wedges_resid))
  
  ## First get restricted productivity distribution
  
  # Now restrict normally 
  restricted_productivity_distribution <- data_nonconnect %>%
    # remove NAs 
    filter(!is.na(TFPQ_wedges_resid)) %>% 
    # only look at TFPR
    select(c(TFPQ_wedges_resid)) %>% 
    # order by TFPR
    arrange(TFPQ_wedges_resid) %>% 
    # drop cutoff*% of observations
    dplyr::slice(round(cutoff*n()):n())
  
  ## Now, draw with replacement from restricted productivity distribution
  
  n_draws <- nrow(data_connect)
  
  # set seeds
  set.seed(123456)
  n_bootstraps = 10000
  bootstrap_seeds = rnorm(n = n_bootstraps, mean = 1000, sd = 100)
  
  # Get sampling function 
  take_sample <- Vectorize(function(data,seed,n_draws){
    set.seed(seed)
    return(sort(sample(x = data, size = n_draws, replace = TRUE), decreasing = T))
  }, vectorize.args = c("seed"))
  
  # bootstrap samples
  bs_productivities <- take_sample(data = restricted_productivity_distribution$TFPQ_wedges_resid, 
                                   seed = bootstrap_seeds, n_draws = n_draws)
  
  bs_TFPQ_wedges_QR <- data_connect$TFPQ_wedges_resid/bs_productivities - 1
  
  # return results
  return(bs_TFPQ_wedges_QR)
}

# baseline TFPQ_wedges_QR (without fixed cost)
baseline_TFPQ_wedges_QR_noF <- compute_baseline_TFPQ_wedges_QR(
  data_connect = firms_1997[firms_1997$connected == "connected",], 
  data_nonconnect = firms_1997[firms_1997$connected == "non-connected",], 
  cutoff = 0.0)

baseline_TFPQ_wedges_QR_noF_bs <- compute_bs_baseline_TFPQ_wedges_QR(
  data_connect = firms_1997[firms_1997$connected == "connected",], 
  data_nonconnect = firms_1997[firms_1997$connected == "non-connected",], 
  cutoff = 0.0)

# baseline TFPQ_wedges_QR (max fixed cost)
max_cutoff_wedges <- (ecdf(firms_1997$TFPQ_wedges_resid[firms_1997$connected == "non-connected"]))(
  min(firms_1997$TFPQ_wedges_resid[firms_1997$connected == "connected"])
)

baseline_TFPQ_wedges_QR_maxF <- compute_baseline_TFPQ_wedges_QR(
  data_connect = firms_1997[firms_1997$connected == "connected",], 
  data_nonconnect = firms_1997[firms_1997$connected == "non-connected",], 
  cutoff = max_cutoff_wedges)

baseline_TFPQ_wedges_QR_maxF_bs <- compute_bs_baseline_TFPQ_wedges_QR(
  data_connect = firms_1997[firms_1997$connected == "connected",], 
  data_nonconnect = firms_1997[firms_1997$connected == "non-connected",], 
  cutoff = max_cutoff)

## Summarize bootstrapped confidence bands  
CI_baseline_TFPQ_wedges_QR_bs_upper <- tibble(
  productivity_order = c(241:1),
  `2.5%` = t(apply(baseline_TFPQ_wedges_QR_noF_bs, 1, quantile, probs = c(0.05, 0.95),  na.rm = TRUE))[,1],
  `97.5%` = t(apply(baseline_TFPQ_wedges_QR_noF_bs, 1, quantile, probs = c(0.05, 0.95),  na.rm = TRUE))[,2]
) %>% pivot_longer(cols = c(`2.5%`,`97.5%`), names_to = "Percentiles", values_to = "TFPQ_wedges_QR")

CI_baseline_TFPQ_wedges_QR_bs_lower <- tibble(
  productivity_order = c(241:1),
  `2.5%` = t(apply(baseline_TFPQ_wedges_QR_maxF_bs, 1, quantile, probs = c(0.05, 0.95),  na.rm = TRUE))[,1],
  `97.5%` = t(apply(baseline_TFPQ_wedges_QR_maxF_bs, 1, quantile, probs = c(0.05, 0.95),  na.rm = TRUE))[,2]
) %>% pivot_longer(cols = c(`2.5%`,`97.5%`), names_to = "Percentiles", values_to = "TFPQ_wedges_QR")


# Plot estimated TFPQ_wedges_QR schedule  
plot_baseline_TFPQ_wedges_QR_bound <- ggplot() + 
  geom_line(data = CI_baseline_TFPQ_wedges_QR_bs_upper, aes(x = (productivity_order/241)*100, y = TFPQ_wedges_QR, group = Percentiles), 
            alpha = 0.5, linetype = "dashed") + 
  geom_line(data = CI_baseline_TFPQ_wedges_QR_bs_lower, aes(x = (productivity_order/241)*100, y = TFPQ_wedges_QR, group = Percentiles), 
            alpha = 0.5, linetype = "dashed") + 
  geom_line(aes(x = (c(1:241)/241)*100, y = baseline_TFPQ_wedges_QR_noF$TFPQ_wedges_QR, color = "Upper"), linewidth = 1) + 
  geom_line(aes(x = (c(1:241)/241)*100, y = baseline_TFPQ_wedges_QR_maxF$TFPQ_wedges_QR, color = "Lower"), linewidth = 1) + 
  geom_hline(yintercept = 0, linetype = "longdash") + 
  ylab(label = "TFPQ Quantile Ratio") + 
  xlab(label = "TFPQ Percentile") + 
  scale_color_discrete(name = "Bound:") + 
  theme_classic(base_size = 16, base_family = "LM Roman 10") + 
  theme(legend.position = "bottom")

## joint plot 
ggarrange(
  plot_TFPQ_wedges_distrib + labs(caption="(A) TFPQ distributions C vs NC") + theme(plot.caption = element_text(hjust=0.5, size=rel(1.2))), 
  plot_baseline_TFPQ_wedges_QR_bound + labs(caption="(B) TFPQ quantile ratio (with bounds)") + theme(plot.caption = element_text(hjust=0.5, size=rel(1.2))),
  ncol = 2, nrow = 1)  

#### Step 3: measure & plot z_tilde variation ####

## Check that TFPQ is constructed correctly with wedges! 

# Construct z_tilde wedges
firms_1997 <- firms_1997 %>% 
  mutate(
    x_star_wedges = ((((1-vat_tax)*alpha_gross)/((1+capital_wedge)*R))^alpha_gross)*((((1-vat_tax)*beta_gross/(1+labor_wedge)))^beta_gross)*(gamma_gross^gamma_gross), 
    z_tilde_wedges = (TFPQ_wedges_resid*x_star_wedges)^(1/(1-eta_tilde_baseline))
  )

# z_tilde_wedges distribution 
plot_z_tilde_wedges_distrib <- firms_1997 %>% 
  mutate(
    connections = fct_rev(as.factor(connected)), 
    connected = case_when(
      connected == "connected" ~ "C", 
      connected == "non-connected" ~ "NC")) %>% 
  ggplot(aes(y = connected, group = connected, fill = connected)) + 
  geom_density_ridges(
    aes(x = log(z_tilde_wedges), # divide by 9040 to show in 2010 USD
        fill = connections),
    alpha = 0.8, color = "black", quantile_lines = TRUE, quantiles = 2, 
    show.legend = TRUE, bandwidth = 0.4) + 
  labs(
    x = "log z tilde",
    y = ""
  ) + 
  theme_classic(base_size = 16, base_family = "LM Roman 10") + 
  scale_fill_discrete(name = "Type:") + 
  theme(legend.position = "bottom")

### Right panel: z_tilde_wedges_QR distribution implied by resid z_tilde_wedges 

# Get baseline z_tilde_wedges_QR function
compute_baseline_z_tilde_wedges_QR <- function(data_connect,data_nonconnect, cutoff){
  
  # Arrange from low to high
  data_connect <- data_connect %>%
    arrange(z_tilde_wedges)
  
  ## First get restricted productivity distribution
  
  ### Need to have specified cutoff! ### 
  
  # Now restrict normally 
  restricted_productivity_distribution <- data_nonconnect %>% 
    # remove NAs 
    filter(!is.na(z_tilde_wedges)) %>% 
    # only look at z_tilde_wedges
    select(c(z_tilde_wedges)) %>% 
    # order by z_tilde_wedges
    arrange(z_tilde_wedges) %>% 
    # drop cutoff*% of observations
    dplyr::slice(round(cutoff*n()):n())
  
  restricted_productivity_distribution <- restricted_productivity_distribution$z_tilde_wedges
  
  # Now get quantiles (weighted by rho if supplied)
  z_connected <- unname(quantile(
    x = restricted_productivity_distribution,
    probs = c(1:nrow_connected)/(nrow_connected+1),
  ))
  z_tilde_wedges_QR_connected <- data_connect$z_tilde_wedges/z_connected - 1 
  
  # return results
  return(list(productivity = z_connected, z_tilde_wedges_QR = z_tilde_wedges_QR_connected))
}

# Get bootstrapping z_tilde_wedges_QR function 
compute_bs_baseline_z_tilde_wedges_QR <- function(data_connect,data_nonconnect, cutoff){
  
  # create productivity variable
  data_connect$productivity <- NA
  
  # Arrange from high to low
  data_connect <- data_connect %>%
    arrange(desc(z_tilde_wedges))
  
  ## First get restricted productivity distribution
  
  # Now restrict normally 
  restricted_productivity_distribution <- data_nonconnect %>%
    # remove NAs 
    filter(!is.na(z_tilde_wedges)) %>% 
    # only look at TFPR
    select(c(z_tilde_wedges)) %>% 
    # order by TFPR
    arrange(z_tilde_wedges) %>% 
    # drop cutoff*% of observations
    dplyr::slice(round(cutoff*n()):n())
  
  ## Now, draw with replacement from restricted productivity distribution
  
  n_draws <- nrow(data_connect)
  
  # set seeds
  set.seed(123456)
  n_bootstraps = 10000
  bootstrap_seeds = rnorm(n = n_bootstraps, mean = 1000, sd = 100)
  
  # Get sampling function 
  take_sample <- Vectorize(function(data,seed,n_draws){
    set.seed(seed)
    return(sort(sample(x = data, size = n_draws, replace = TRUE), decreasing = T))
  }, vectorize.args = c("seed"))
  
  # bootstrap samples
  bs_productivities <- take_sample(data = restricted_productivity_distribution$z_tilde_wedges, 
                                   seed = bootstrap_seeds, n_draws = n_draws)
  
  bs_z_tilde_wedges_QR <- data_connect$z_tilde_wedges/bs_productivities - 1
  
  # return results
  return(bs_z_tilde_wedges_QR)
}

# baseline z_tilde_wedges_QR (without fixed cost)
baseline_z_tilde_wedges_QR_noF <- compute_baseline_z_tilde_wedges_QR(
  data_connect = firms_1997[firms_1997$connected == "connected",], 
  data_nonconnect = firms_1997[firms_1997$connected == "non-connected",], 
  cutoff = 0.0)

baseline_z_tilde_wedges_QR_noF_bs <- compute_bs_baseline_z_tilde_wedges_QR(
  data_connect = firms_1997[firms_1997$connected == "connected",], 
  data_nonconnect = firms_1997[firms_1997$connected == "non-connected",], 
  cutoff = 0.0)

## Summarize bootstrapped confidence bands  
CI_baseline_z_tilde_wedges_QR_bs_upper <- tibble(
  productivity_order = c(241:1),
  `2.5%` = t(apply(baseline_z_tilde_wedges_QR_noF_bs, 1, quantile, probs = c(0.05, 0.95),  na.rm = TRUE))[,1],
  `97.5%` = t(apply(baseline_z_tilde_wedges_QR_noF_bs, 1, quantile, probs = c(0.05, 0.95),  na.rm = TRUE))[,2]
) %>% pivot_longer(cols = c(`2.5%`,`97.5%`), names_to = "Percentiles", values_to = "z_tilde_wedges_QR")

# Plot estimated z_tilde_wedges_QR schedule  
plot_baseline_z_tilde_wedges_QR <- ggplot() + 
  geom_line(data = CI_baseline_z_tilde_wedges_QR_bs_upper, aes(x = (productivity_order/241)*100, y = z_tilde_wedges_QR, group = Percentiles), 
            alpha = 0.5, linetype = "dashed") + 
  geom_line(aes(x = (c(1:241)/241)*100, y = baseline_z_tilde_wedges_QR_noF$z_tilde_wedges_QR, color = "Upper"), linewidth = 1) + 
  geom_hline(yintercept = 0, linetype = "longdash") + 
  ylab(label = "z tilde Quantile Ratio") + 
  xlab(label = "z tilde Percentile") + 
  scale_color_discrete(name = "Bound:") + 
  theme_classic(base_size = 16, base_family = "LM Roman 10") + 
  theme(legend.position = "none")

plot_baseline_z_tilde_wedges_QR

## joint plot 
ggarrange(
  plot_z_tilde_wedges_distrib + labs(caption="(A) Z tilde distributions C vs NC") + theme(plot.caption = element_text(hjust=0.5, size=rel(1.2))), 
  plot_baseline_z_tilde_wedges_QR + labs(caption="(B) Z tilde quantile ratio") + theme(plot.caption = element_text(hjust=0.5, size=rel(1.2))),
  ncol = 2, nrow = 1)  

#### Show distribution of wedges! Correlate them with other measures, e.g. TFPQ & z_tilde #### 

### TFPQ - capital wedge correlation ### 
plot_TFPQ_capital_wedges <- ggplot() +
  geom_point(aes(x = firms_1997$log_TFPQ_wedges_resid[firms_1997$connected == "non-connected"], 
                 y = (1+firms_1997$capital_wedge[firms_1997$connected == "non-connected"])^(alpha_gross), 
                 color = "non-connected", group = "non-connected"), 
             size = 0.1, alpha = 0.2) + 
  geom_smooth(method='lm', formula= y~x, 
              aes(x = firms_1997$log_TFPQ_wedges_resid[firms_1997$connected == "non-connected"], 
                  y = (1+firms_1997$capital_wedge[firms_1997$connected == "non-connected"])^(alpha_gross), 
                  color = "non-connected", group = "non-connected")) + 
  geom_point(aes(x = firms_1997$log_TFPQ_wedges_resid[firms_1997$connected == "connected"], 
                 y = (1+firms_1997$capital_wedge[firms_1997$connected == "connected"])^(alpha_gross), 
                 color = "connected", group = "connected"), 
             size = 0.5, alpha = 0.75) + 
  geom_smooth(method='lm', formula= y~x, 
              aes(x = firms_1997$log_TFPQ_wedges_resid[firms_1997$connected == "connected"], 
                  y = (1+firms_1997$capital_wedge[firms_1997$connected == "connected"])^(alpha_gross), 
                  color = "connected", group = "connected")) + 
  ylab("Effective capital wedge") + 
  xlab("(measured) log TFPQ") + 
  theme_classic(base_size = 16, base_family = "LM Roman 10") + 
  scale_color_discrete(name = "Type:") + 
  theme(legend.position = "bottom") + 
  ylim(0.836,1.21)


### TFPQ - labor wedge correlation ### 
plot_TFPQ_labor_wedges <- ggplot() +
  geom_point(aes(x = firms_1997$log_TFPQ_wedges_resid[firms_1997$connected == "non-connected"], 
                 y = (1+firms_1997$labor_wedge[firms_1997$connected == "non-connected"])^(beta_gross), 
                 color = "non-connected", group = "non-connected"), 
             size = 0.1, alpha = 0.2) + 
  geom_smooth(method='lm', formula= y~x, 
              aes(x = firms_1997$log_TFPQ_wedges_resid[firms_1997$connected == "non-connected"], 
                  y = (1+firms_1997$labor_wedge[firms_1997$connected == "non-connected"])^(beta_gross), 
                  color = "non-connected", group = "non-connected")) + 
  geom_point(aes(x = firms_1997$log_TFPQ_wedges_resid[firms_1997$connected == "connected"], 
                 y = (1+firms_1997$labor_wedge[firms_1997$connected == "connected"])^(beta_gross), 
                 color = "connected", group = "connected"), 
             size = 0.5, alpha = 0.75) + 
  geom_smooth(method='lm', formula= y~x, 
              aes(x = firms_1997$log_TFPQ_wedges_resid[firms_1997$connected == "connected"], 
                  y = (1+firms_1997$labor_wedge[firms_1997$connected == "connected"])^(beta_gross), 
                  color = "connected", group = "connected")) + 
  ylab("Effective labor wedge") + 
  xlab("(measured) TFPQ") + 
  theme_classic(base_size = 16, base_family = "LM Roman 10") + 
  scale_color_discrete(name = "Type:") + 
  theme(legend.position = "bottom") + 
  ylim(0.8719,1.1491)

# So "wedges" are increasing in TFPQ. 
# Conditional on TFPQ, looks pretty similar for connected firms 
# Higher wedges of connected firms mostly explained by higher (measured) TFPQ 
# Conditional on TFPQ, connected firms tend to face slightly LOWER taxes. 

## Create joint plot (for Appendix) 
ggarrange(
  plot_TFPQ_labor_wedges + labs(caption="(A) Correlation: labor wedges and TFPQ") + theme(plot.caption = element_text(hjust=0.5, size=rel(1.2))), 
  plot_TFPQ_capital_wedges + labs(caption="(B) Correlation: capital wedges and TFPQ") + theme(plot.caption = element_text(hjust=0.5, size=rel(1.2))),
  ncol = 2, nrow = 1)  
ggsave(width = 12, height = 5, dpi = 200,
       "R/output/Figure_appendix_labor_capital_wedges_TFPQ.png")


## Show summary plot (with summary of wedges) 
firms_1997_wedges <- firms_1997 %>% 
  mutate(
    joint_effective_wedge = ((1+capital_wedge)^alpha_gross)*((1+labor_wedge)^beta_gross)
  ) %>% 
  filter(!is.na(joint_effective_wedge)) # drop single observation with missing labor wedge 

plot_joint_wedges_TFPQ_cor <- ggplot() +
  geom_point(aes(x = firms_1997_wedges$log_TFPQ_wedges_resid[firms_1997_wedges$connected == "non-connected"], 
                 y = (firms_1997_wedges$joint_effective_wedge[firms_1997_wedges$connected == "non-connected"]), 
                 color = "non-connected", group = "non-connected"), 
             size = 0.1, alpha = 0.2) + 
  geom_smooth(method='lm', formula= y~x, 
              aes(x = firms_1997_wedges$log_TFPQ_wedges_resid[firms_1997_wedges$connected == "non-connected"], 
                  y = (firms_1997_wedges$joint_effective_wedge[firms_1997_wedges$connected == "non-connected"]), 
                  color = "non-connected", group = "non-connected")) + 
  geom_point(aes(x = firms_1997_wedges$log_TFPQ_wedges_resid[firms_1997_wedges$connected == "connected"], 
                 y = (firms_1997_wedges$joint_effective_wedge[firms_1997_wedges$connected == "connected"]), 
                 color = "connected", group = "connected"), 
             size = 0.5, alpha = 0.75) + 
  geom_smooth(method='lm', formula= y~x, 
              aes(x = firms_1997_wedges$log_TFPQ_wedges_resid[firms_1997_wedges$connected == "connected"], 
                  y = (firms_1997_wedges$joint_effective_wedge[firms_1997_wedges$connected == "connected"]), 
                  color = "connected", group = "connected")) + 
  ylab("Effective total wedge") + 
  xlab("log TFPQ") + 
  theme_classic(base_size = 16, base_family = "LM Roman 10") + 
  scale_color_discrete(name = "Type:") + 
  theme(legend.position = "bottom") + 
  ylim(0.729,1.379)

#### Save joint plot for main paper ####

ggarrange(
  plot_joint_wedges_TFPQ_cor + labs(caption="(A) Correlation: wedges and TFPQ") + theme(plot.caption = element_text(hjust=0.5, size=rel(1.2))), 
  plot_baseline_TFPQ_wedges_QR_bound + labs(caption="(B) TFPQ quantile ratio (with bounds)") + theme(plot.caption = element_text(hjust=0.5, size=rel(1.2))),
  ncol = 2, nrow = 1)  
ggsave(width = 12, height = 5, dpi = 200,
       "R/output/Figure_main_wedge_extension.png")

#### Estimate wedge-related parameters that can be directly estimated ####

# Means: C >> NC (but driven by higher TFPQ)
mean_tau_C <- mean(log(firms_1997_wedges$joint_effective_wedge[firms_1997_wedges$connected == "connected"]))
mean_tau_NC <- mean(log(firms_1997_wedges$joint_effective_wedge[firms_1997_wedges$connected == "non-connected"]))

# Variances: C > NC (also driven by higher TFPQ?) 
var_tau_C <- var(log(firms_1997_wedges$joint_effective_wedge[firms_1997_wedges$connected == "connected"]))
var_tau_NC <- var(log(firms_1997_wedges$joint_effective_wedge[firms_1997_wedges$connected == "non-connected"]))

# Correlation of wedges and productivity for non-connected: 
lm_cor_wedges_TFPQ_NC <- lm(
  data = firms_1997_wedges[firms_1997_wedges$connected == "non-connected",],
  formula = log(joint_effective_wedge) ~ log_TFPQ_wedges_resid)
rho_NC <- lm_cor_wedges_TFPQ_NC$coefficients[["log_TFPQ_wedges_resid"]]/(sqrt(var_tau_NC)/sd(log(firms_1997_wedges$TFPQ_wedges_resid[firms_1997_wedges$connected == "non-connected"])))

# Correlation of wedges and productivity for connected (target this below!) 
lm_cor_wedges_TFPQ_C <- lm(
  data = firms_1997_wedges[firms_1997_wedges$connected == "connected",],
  formula = log(joint_effective_wedge) ~ log_TFPQ_wedges_resid)
beta_coef_TFPQ_wedges_C <- lm_cor_wedges_TFPQ_C$coefficients[["log_TFPQ_wedges_resid"]]

###### Estimate Connections Technology with wedges ######

#### Prepare objects & set grid length ####

## To speed up estimation: Reduce length for epsilon & wedges (can still keep z_star as long as before!)
grid_dense_length_wedges <- 100

# create z_tilde_function
z_tilde_wedges <- function(z_star,x_star){
  return( (z_star*x_star)^(1/(1-eta_tilde_baseline)) )
}

# targeted moment 
empirical_QR_wedges <- baseline_TFPQ_wedges_QR_noF$TFPQ_wedges_QR 

# Extract z_star_NC_wedges
z_star_NC_wedges <- firms_1997_wedges$TFPQ_wedges_resid[firms_1997_wedges$connected == "non-connected"]

# get parameters based on z_star 
mean(log(z_star_NC_wedges))
var(log(z_star_NC_wedges))

# get density distribution over z_star_NC_wedges
z_star_wedges_marginal_distrib <- approxfun(density(log(z_star_NC_wedges)))(log(z_star_NC_wedges))

# specify grid here!! Want to range over z_i for non-connected firms (don't need very many points!) 
z_star_wedges_grid <- seq(from = unname(quantile(z_star_NC_wedges,probs = 0.0)), 
                          to = unname(quantile(z_star_NC_wedges,probs = 1)), 
                          length.out = grid_length)

z_star_wedges_grid_dense <- exp(seq(from = min(log(z_star_NC_wedges)), 
                                    to = max(log(z_star_NC_wedges)), 
                                    length.out = 500)) # can still keep length of z long 

# get density distribution over z_star grids 
z_star_wedges_grid_marginal_distrib <- approxfun(density(z_star_NC_wedges))(z_star_wedges_grid)
z_star_wedges_grid_marginal_distrib <- z_star_wedges_grid_marginal_distrib/sum(z_star_wedges_grid_marginal_distrib)

z_star_wedges_grid_dense_marginal_distrib <- approxfun(density(z_star_NC_wedges))(z_star_wedges_grid_dense)
z_star_wedges_grid_dense_marginal_distrib <- z_star_wedges_grid_dense_marginal_distrib/sum(z_star_wedges_grid_dense_marginal_distrib)

# # can export z_star_grid with distribution
# write_csv(x = tibble(z_star = z_star_wedges_grid_dense, distrib = z_star_wedges_grid_dense_marginal_distrib), 
#           file = "Computation/data/z_star_grid_wedges.csv")

# need this for later
lowest_TFPQ_connected <- min(firms_1997_wedges$TFPQ_wedges_resid[firms_1997_wedges$connected == "connected"])

### Create z_star vectors

# z_star grid dense long
z_star_wedges_grid_dense_long <- (crossing(
  z_star = z_star_wedges_grid_dense,
  epsilon = c(1:grid_dense_length_wedges), 
  wedge = c(1:grid_dense_length_wedges)))$z_star

# also make z star grid marginal distrib long
z_star_wedges_grid_dense_marginal_distrib_long <- approxfun(density(log(z_star_NC_wedges)))(log(z_star_wedges_grid_dense_long))

#### Function to get joint distribution of subsidies over (z*,eps,wedges) ####

# Function with 
compute_subsidy_DRS_eps_cost_wedges_model_joint <- function(parameters, n_sampling = 10000){
  
  print(parameters)
  
  ##### Step 1: Prepare joint epsilon, wedge & productivity grid #####
  
  ### get good range for epsilon (simulating min and max)
  
  # Maximum (already take into account that correlation will be negative!!) 
  set.seed(12345)
  min_z_star <- unname(quantile(x = z_star_NC_wedges, probs = c(0.025))) # take 2.5% percentile
  random_draw_eps_max <- rnorm(
    n = 50000, 
    mean = parameters["alpha_eps"] + parameters["beta_eps"]*log(min_z_star),
    sd = sqrt(parameters["variance_eps_z"]))
  eps_max <- unname(quantile(x = random_draw_eps_max, probs = c(0.975))) # take 97.5% percentile 
  
  # Minimum 
  set.seed(12345)
  max_z_star <- unname(quantile(x = z_star_NC_wedges, probs = c(0.975))) # take 97.5% percentile
  random_draw_eps_min <- rnorm(
    n = 50000, 
    mean = parameters["alpha_eps"] + parameters["beta_eps"]*log(max_z_star),
    sd = sqrt(parameters["variance_eps_z"]))
  eps_min <- max(parameters["cost_level"],unname(quantile(x = random_draw_eps_min, probs = c(0.025)))) # make sure that epsilon is not below c (otherwise, no rent-seeking)
  
  #
  if(eps_min > eps_max){
    print("Warning: eps min > eps max, will set eps max larger")
    eps_max <- 2*eps_min
  }
  
  ### get good range for wedge (simulating min and max)
  
  var_log_wedge_z <- (1 - parameters["corr_wedge_z"]^2)*var_tau_C
  sd_tau_C <- sqrt(var_tau_C)
  mean_log_z_star <- mean(log(z_star_NC_wedges))
  sd_log_z_star <- sd(log(z_star_NC_wedges))
  
  # Minimum (already take into account that correlation is positive!!) 
  set.seed(12345)
  random_draw_wedge_min <- rnorm(
    n = 50000, 
    mean = mean_tau_C + parameters["corr_wedge_z"]*(sd_tau_C/sd_log_z_star)*(log(min_z_star) - mean_log_z_star),
    sd = sqrt(var_log_wedge_z)) 
  wedge_min <- unname(quantile(x = random_draw_wedge_min, probs = c(0.025))) # take 2.5% percentile 
  
  # Maximum 
  set.seed(12345)
  random_draw_wedge_max <- rnorm(
    n = 50000, 
    mean = mean_tau_C + parameters["corr_wedge_z"]*(sd_tau_C/sd_log_z_star)*(log(max_z_star) - mean_log_z_star),
    sd = sqrt(var_log_wedge_z))
  wedge_max <- unname(quantile(x = random_draw_wedge_max, probs = c(0.975)))
  
  # 
  if(wedge_min > wedge_max){
    print("Warning: wedge min > wedge max, will set wedge max larger")
    wedge_max <- 2*wedge_min
  }
  
  ### Create joint grid 
  
  joint_grid <- crossing(
    z_star = z_star_wedges_grid_dense,
    epsilon = seq(
      from = eps_min,
      to = eps_max, 
      length.out = grid_dense_length_wedges),
    log_wedge = seq(
      from = wedge_min,
      to = wedge_max, 
      length.out = grid_dense_length_wedges))
  
  z_star_wedges_grid_dense_long <- joint_grid$z_star
  epsilon_grid_dense_long <- joint_grid$epsilon 
  wedge_grid_dense_long <- exp(joint_grid$log_wedge)
  
  print("Step 1 done.")
  
  ##### Step 2: Get probability distribution over states #####
  
  # Create df
  results_df <- tibble(
    z_star = z_star_wedges_grid_dense_long,
    epsilon = epsilon_grid_dense_long,
    wedge = wedge_grid_dense_long, 
    z_star_wedges_marginal_distrib = z_star_wedges_grid_dense_marginal_distrib_long, 
    epsilon_condit_distrib = dnorm(
      x = epsilon_grid_dense_long,
      mean = parameters["alpha_eps"] + parameters["beta_eps"]*log(z_star_wedges_grid_dense_long),
      sd = sqrt(parameters["variance_eps_z"])), 
    wedge_condit_distrib = dnorm(
      x = log(wedge_grid_dense_long),
      mean = mean_tau_C + parameters["corr_wedge_z"]*(sd_tau_C/sd_log_z_star)*(log(z_star_wedges_grid_dense_long) - mean_log_z_star),
      sd = sqrt(var_log_wedge_z)) 
  )
  
  # Get joint density for each state
  results_df <- results_df %>% 
    group_by(z_star) %>% 
    mutate(
      # first normalize within z_star
      epsilon_condit_distrib = epsilon_condit_distrib/sum(epsilon_condit_distrib), 
      wedge_condit_distrib = wedge_condit_distrib/sum(wedge_condit_distrib),
      joint_condit_distrib = (epsilon_condit_distrib*wedge_condit_distrib)/sum(epsilon_condit_distrib*wedge_condit_distrib)
    ) %>% ungroup() %>% 
    mutate(
      joint_density = joint_condit_distrib*z_star_wedges_marginal_distrib, 
      joint_density = joint_density/sum(joint_density)
      # # then get joint density: product of marginal & condit distrib  
      # joint_density_epsilon = epsilon_condit_distrib*z_star_wedges_marginal_distrib,
      # joint_density_epsilon = joint_density_epsilon/sum(joint_density_epsilon),
      # # then get joint density: product of marginal & condit distrib  
      # joint_density_wedge = wedge_condit_distrib*z_star_wedges_marginal_distrib,
      # joint_density_wedge = joint_density_wedge/sum(joint_density_wedge), 
      # # Now combine for full joint density (simply multiply given independence)
      # joint_density = joint_density_epsilon*joint_density_wedge,
      # joint_density = joint_density/sum(joint_density)
    )
  
  ## Next, draw from joint densities to reduce dimensionality 
  
  # draw representative sub-sample from state space 
  set.seed(12345) 
  sampled_entries <- sample(
    x = c(1:nrow(results_df)), 
    size = n_sampling, replace = T, 
    prob = results_df$joint_density)
  
  # restrict results to sampled entries (to reduce dimensionality)
  results_df <- results_df[sampled_entries,]
  
  # now compute x_star & z_tilde
  results_df$x_star <- ( (((1-vat_tax)*alpha_gross/R)^alpha_gross)*(((1-vat_tax)*beta_gross/wage)^beta_gross)*((gamma_gross/P)^gamma_gross) )*(1/results_df$wedge)
  results_df$z_tilde <- z_tilde_wedges(
    z_star = results_df$z_star,  
    x_star = results_df$x_star)
  
  # make sure to save wedges 
  wedges <- results_df$wedge
  
  print("Step 2 done.")
  
  ##### Step 3: Fill (restricted) results df with optimal choices  #####
  
  # Directly estimate results for restricted results_df sample
  results_df <- compute_subsidy_DRS_eps_cost_model_grid(
    z_star_grid = results_df$z_star,
    epsilon_grid = results_df$epsilon,
    z_tilde_grid = results_df$z_tilde,
    parameters = parameters)
  
  # make sure to add wedges back in 
  results_df$wedge <- wedges 
  
  print("Step 3 done.")
  
  ##### Step 4: Get results & return #####
  
  # restrict to firms that would choose to become connected
  connected_df <- results_df[results_df$choose_C,]
  
  # Build quantiles of TFPR distribution of connected firms in data
  connected_df <- connected_df %>% arrange(TFPQ_model)
  
  quantiles_connected_TFPQ_model <- unname(quantile(
    x = connected_df$TFPQ_model,
    probs = c(1:nrow_connected)/(nrow_connected+1)
  ))
  
  # Compute Quantile ratio in Model
  model_QR <- (compute_baseline_TFPQ_wedges_QR(
    data_connect = tibble(
      index = c(1:241),
      TFPQ_wedges_resid = quantiles_connected_TFPQ_model),
    data_nonconnect = results_df %>% rename(TFPQ_wedges_resid = z_star),
    cutoff = 0.0))$TFPQ_wedges_QR
  
  # evaluate objective over quantiles of TFPQ
  objective_QR_moment <- sum( (empirical_QR_wedges - model_QR)^2 )
  objective_QR_moment_norm <- objective_QR_moment/(sum( (mean(empirical_QR_wedges) - empirical_QR_wedges)^2 ))
  
  # Also compute implied probability of becoming connected
  proba_C <- sum(results_df$choose_C)/n_sampling
  
  # Compute coefficient for correlation of TFPQ and wedges 
  lm_cor_wedges_TFPQ_C_model <- lm(
    data = connected_df, 
    formula = log(wedge) ~ log(TFPQ_model)
  )
  beta_coef_TFPQ_wedges_C_model <- lm_cor_wedges_TFPQ_C_model$coefficients[["log(TFPQ_model)"]]
  
  # construct objective for beta coefficient 
  objective_beta_coef <- abs((beta_coef_TFPQ_wedges_C - beta_coef_TFPQ_wedges_C_model)/beta_coef_TFPQ_wedges_C)
  
  # return
  return(list(objective_QR = objective_QR_moment_norm,
              objective_beta = objective_beta_coef, 
              objective = objective_QR_moment_norm + objective_beta_coef,
              results_df = results_df,
              quantiles_model = quantiles_connected_TFPQ_model,
              proba_C = proba_C
  ))
}

#### Find optimal parameters #### 

# start with initial guess 
guess_param_wedges <- c(
  "corr_wedge_z" = rho_NC,
  "fixed_cost" = 0.0, 
  "theta" = 0.2, 
  "alpha_eps" = 0.041, 
  "beta_eps" = -0.011, 
  "variance_eps_z" = 4.656e-07, 
  "cost_level" = 1e-08, 
  "cost_elasticity" = 1.04)

# check that get results given guess 
test_DRS_wedges <- compute_subsidy_DRS_eps_cost_wedges_model_joint(parameters = guess_param_wedges, n_sampling = 10000)

## Objective(s) seem to work! 
test_DRS_wedges$objective
test_DRS_wedges$objective_beta
test_DRS_wedges$objective_QR

# optimal subsidies
summary(test_DRS_wedges$results_df$optimal_subsidy)

# test that marginal distribution of z* is indeed the same!! Yes! 
ggplot() + 
  geom_density(aes(x = log(test_DRS_wedges$results_df$z_star), color = "Model")) + 
  geom_density(aes(x = log(z_star_NC_wedges), color = "Data"))

# start with good guess (found from running many different coarser optimizations before)
initial_parameter_wedges <- c(
  "corr_wedge_z" = 0.795,
  "alpha_eps" = 0.0744, 
  "beta_eps" = -0.01287, 
  "variance_eps_z" = 4.7726e-07, 
  "cost_level" = 1.023e-08)

## Start optim parallel (fixes elasticities, they were already found based on grid search)
cl <- makeCluster(detectCores() - 2, type="FORK")
setDefaultCluster(cl = cl)

# Find optimal parameters (starting from good initial guess & relatively tight bounds!)
optimal_wedges <- optimParallel( 
  par = initial_parameter_wedges/initial_parameter_wedges, 
  fn = function(par){
    parameters <- par*initial_parameter_wedges
    
    # check parameter combinations 
    if(mean(parameters["alpha_eps"] + parameters["beta_eps"]*log(z_star_NC_wedges)) < 0.01){
      return(1e12*(1/parameters["alpha_eps"]))
    } else{
      results <- compute_subsidy_DRS_eps_cost_wedges_model_joint(
        parameters = c(parameters,"fixed_cost" = 0.0,
                       "theta" = 0.2,"cost_elasticity" = 1.04), 
        n_sampling = 10000)
      print(paste("Objective is: ", results$objective))
      return(results$objective)
    }
  },
  method = "L-BFGS-B",
  lower = c(0.75,0.5,0.5,0.5,0.5), # 0.75,
  upper = c(1.175,2.0,2.0,2.0,2.0) # 1.01,
)

stopCluster(cl) 

optimal_wedges$par*initial_parameter_wedges
optimal_wedges$value # Overall objective = 0.197 (R2 = 0.803)
optimal_wedges$message # Converged

# solve for correlation rho: -0.999
ratio_beta_sigma_wedges <- ((optimal_wedges$par*initial_parameter_wedges)["beta_eps"])/sqrt((optimal_wedges$par*initial_parameter_wedges)["variance_eps_z"])
estimate_rho_wedges <- ratio_beta_sigma_wedges/sqrt((ratio_beta_sigma_wedges)^2 + 1)

# save parameters 
parameters_wedges <- c(
  optimal_wedges$par*initial_parameter_wedges,
  "fixed_cost" = 0.0, 
  "theta" = 0.2, 
  "cost_elasticity" = 1.04, 
  "proba_c" = nrow_connected/(nrow_connected+nrow_non_connected), 
  "rho" = unname(estimate_rho_wedges)
)

# also save as dataframe (to read in Julia)
parameters_df_wedges <- tibble(
  "corr_wedge_z" = (optimal_wedges$par*initial_parameter_wedges)[[1]],
  "alpha_eps" = (optimal_wedges$par*initial_parameter_wedges)[[2]],
  "beta_eps" = (optimal_wedges$par*initial_parameter_wedges)[[3]],
  "variance_eps_z" = (optimal_wedges$par*initial_parameter_wedges)[[4]],
  "cost_level" = (optimal_wedges$par*initial_parameter_wedges)[[5]],
  "fixed_cost" = 0.0, 
  "theta" = 0.2, 
  "cost_elasticity" = 1.04, 
  "proba_c" = nrow_connected/(nrow_connected+nrow_non_connected), 
  "rho" = unname(estimate_rho_wedges),
  "mean_tau_C" = mean_tau_C, 
  "mean_tau_NC" = mean_tau_NC, 
  "sd_tau_C" = sqrt(var_tau_C),
  "sd_tau_NC" = sqrt(var_tau_NC),
  "mean_log_z_star" = mean(log(z_star_NC_wedges)),
  "sd_log_z_star" = sd(log(z_star_NC_wedges)),
  "corr_wedge_z_NC" = rho_NC,
  "corr_wedge_z_C" = (optimal_wedges$par*initial_parameter_wedges)[[1]]
)
# write_csv(x = parameters_df_wedges, file = "Computation/data/parameters_wedges.csv")

# now find these results 
results_optimal_wedges <- compute_subsidy_DRS_eps_cost_wedges_model_joint(
  parameters = c(optimal_wedges$par*initial_parameter_wedges,
                 "fixed_cost" = 0.0, "theta" = 0.2, "cost_elasticity" = 1.04), 
  n_sampling = 50000)

results_optimal_wedges$objective_beta # Only deviates by 6.7%
results_optimal_wedges$objective_QR # R2 on QR is at 84%


#### Save joint plot for main paper (Figure 6) ####

# compute TFPQ quantile ratio in model 
model_wedges_TFPQ_QR_noF <- compute_baseline_TFPQ_wedges_QR(
  data_connect = tibble(
    index = c(1:241),
    TFPQ_wedges_resid = results_optimal_wedges$quantiles_model), 
  data_nonconnect = results_optimal_wedges$results_df %>% 
    rename(TFPQ_wedges_resid = z_star), 
  cutoff = 0.0)

# Plot estimated TFPQ QR schedule  
plot_main_TFPQ_wedges_QR_fit <- ggplot() + 
  geom_line(data = CI_baseline_TFPQ_wedges_QR_bs_upper, aes(x = (productivity_order/241)*100, y = TFPQ_wedges_QR, group = Percentiles), 
            alpha = 0.5, linetype = "dashed") + 
  geom_point(aes(x = (c(1:241)/241)*100, y = baseline_TFPQ_wedges_QR_noF$TFPQ_wedges_QR, color = "Data"), shape = 4) + 
  geom_line(aes(x = (c(1:241)/241)*100, y = model_wedges_TFPQ_QR_noF$TFPQ_wedges_QR, color = "Model"), linewidth = 1.0) + 
  geom_hline(yintercept = 0, linetype = "longdash") + 
  ylab(label = "TFPQ Quantile Ratio") + 
  xlab(label = "TFPQ Percentiles") + 
  scale_color_manual(values = c("#e74c3c","#3498db"), name = "Type:") + 
  theme_classic(base_size = 16, base_family = "LM Roman 10") + 
  theme(legend.position = "bottom")

## Create joint plot with results (incl estimation fit) 
ggarrange(
  plot_joint_wedges_TFPQ_cor + labs(caption="(A) Correlation: wedges and TFPQ") + theme(plot.caption = element_text(hjust=0.5, size=rel(1.2))), 
  plot_main_TFPQ_wedges_QR_fit + labs(caption="(B) TFPQ quantile ratio") + theme(plot.caption = element_text(hjust=0.5, size=rel(1.2))),
  ncol = 2, nrow = 1)
ggsave(width = 12, height = 5, dpi = 200,
       "R/output/Figure6_wedge_extension.pdf")

###### Remaining estimation ######
#### Relative labor and capital wedges conditional on total wedges ####

# Test: Do relative labor and capital wedges correlate with productivity conditional on total wedge?

firms_1997_wedges <- firms_1997_wedges %>% 
  mutate(
    capital_wedge_incl = (1+capital_wedge)^alpha_gross, 
    labor_wedge_incl = (1+labor_wedge)^beta_gross,
    capital_wedge_share = log(capital_wedge_incl)/(log(capital_wedge_incl) + log(labor_wedge_incl)),
    labor_wedge_share = 1 - capital_wedge_share, 
    FE_total_wedge = ntile(x = log(joint_effective_wedge), n = 250)
  )

### Test: Look at shares and see if they systematically vary with total wedge & TFPQ 

# No, capital wedge share does not vary with total wedge (effect is very small; not significant if dont restrict)
summary(fixest::feols(
  data = firms_1997_wedges %>% filter(connected == "non-connected" & capital_wedge_share >= -5 & capital_wedge_share <= 5),
  fml = capital_wedge_share ~ log(joint_effective_wedge)))
# Why is effect small? One unit increase in log(joint_effective_wedge) gives 24pp higher capital share, but IQR of log(joint_effective_wedge) is only 0.22, so IQR gives 0.05pp (which is small)

# No correlation with TFPQ conditional on total wedge
summary(fixest::feols(
  data = firms_1997_wedges %>% filter(connected == "non-connected"  & capital_wedge_share >= 0 & capital_wedge_share <= 1),
  fml = capital_wedge_share ~ log_TFPQ_wedges_resid | FE_total_wedge))

## Conclusion: Warranted to just randomly draw relative labor/capital wedge given total wedge 

## Before exporting winsorize 
capital_wedge_share_NC <- firms_1997_wedges$capital_wedge_share[firms_1997_wedges$connected == "non-connected"]
capital_wedge_share_NC[capital_wedge_share_NC < quantile(x = capital_wedge_share_NC, probs = 0.05)] <- unname(quantile(x = capital_wedge_share_NC, probs = 0.05))
capital_wedge_share_NC[capital_wedge_share_NC > quantile(x = capital_wedge_share_NC, probs = 0.95)] <- unname(quantile(x = capital_wedge_share_NC, probs = 0.95))

capital_wedge_share_C <- firms_1997_wedges$capital_wedge_share[firms_1997_wedges$connected == "connected"]
capital_wedge_share_C[capital_wedge_share_C < quantile(x = capital_wedge_share_C, probs = 0.05)] <- unname(quantile(x = capital_wedge_share_C, probs = 0.05))
capital_wedge_share_C[capital_wedge_share_C > quantile(x = capital_wedge_share_C, probs = 0.95)] <- unname(quantile(x = capital_wedge_share_C, probs = 0.95))

# # export relative capital share
# write_csv(x = tibble(capital_wedge_share = capital_wedge_share_NC), file = "Computation/data/capital_wedge_share_NC.csv")
# write_csv(x = tibble(capital_wedge_share = capital_wedge_share_C), file = "Computation/data/capital_wedge_share_C.csv")


#######################################################
###### Section 5.2 Extension: Production Network ###### 
#######################################################

### This section requires the full micro data (access to industry-level info), see full code at: main_results_final.R
